From 5ecc69e49c3794db5a477165fb82d3715342db64 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 23 Nov 2007 15:40:59 -0500 Subject: [PATCH] More Makefile cleanups, otherwise mainly noticeable are the netfilter fix and the LVM update. Linus - Christoph Rohland: shmfs cleanup - Nicolas Pitre: don't forget loop.c flags - Geert Uytterhoeven: new-style m68k Makefiles - Neil Brown: knfsd cleanups, raid5 re-org - Andrea Arkangeli: update to LVM-0.9 - LC Chang: sis900 driver doc update - David Miller: netfilter oops fix - Andrew Grover: acpi update --- Documentation/Configure.help | 4 +- Documentation/DocBook/Makefile | 6 +- Documentation/DocBook/kernel-locking.tmpl | 2 +- Documentation/DocBook/sis900.tmpl | 585 +++++++ Documentation/networking/sis900.txt | 84 +- Makefile | 4 +- arch/alpha/config.in | 1 + arch/i386/kernel/Makefile | 1 - arch/i386/kernel/acpi.c | 53 - arch/i386/kernel/setup.c | 4 +- arch/i386/math-emu/fpu_system.h | 2 +- arch/m68k/amiga/Makefile | 10 +- arch/m68k/apollo/Makefile | 2 +- arch/m68k/atari/Makefile | 12 +- arch/m68k/bvme6000/Makefile | 4 +- arch/m68k/hp300/Makefile | 9 +- arch/m68k/kernel/Makefile | 12 +- arch/m68k/lib/Makefile | 4 +- arch/m68k/mac/Makefile | 8 +- arch/m68k/math-emu/Makefile | 5 +- arch/m68k/mm/Makefile | 7 +- arch/m68k/mvme147/Makefile | 2 +- arch/m68k/mvme16x/Makefile | 6 +- arch/m68k/q40/Makefile | 2 +- arch/m68k/sun3/Makefile | 7 +- arch/m68k/sun3x/Makefile | 4 +- arch/ppc/amiga/Makefile | 6 +- drivers/acpi/Makefile | 2 +- drivers/acpi/cmbatt.c | 351 +++- drivers/acpi/common/cmcopy.c | 4 +- drivers/acpi/common/cminit.c | 24 +- drivers/acpi/common/cmobject.c | 4 +- drivers/acpi/dispatcher/dswstate.c | 4 +- drivers/acpi/driver.c | 49 +- drivers/acpi/driver.h | 4 +- drivers/acpi/ec.c | 508 ++++-- drivers/acpi/ec.h | 100 ++ drivers/acpi/events/evevent.c | 10 +- drivers/acpi/events/evregion.c | 20 +- drivers/acpi/events/evxfregn.c | 4 +- drivers/acpi/hardware/hwacpi.c | 12 +- drivers/acpi/hardware/hwregs.c | 30 +- drivers/acpi/include/acevents.h | 5 +- drivers/acpi/include/aclinux.h | 18 +- drivers/acpi/include/aclocal.h | 9 +- drivers/acpi/include/acmacros.h | 18 +- drivers/acpi/include/acnamesp.h | 3 +- drivers/acpi/include/acpiosxf.h | 14 +- drivers/acpi/include/acpixf.h | 2 +- drivers/acpi/include/actypes.h | 25 +- drivers/acpi/interpreter/ammonad.c | 66 +- drivers/acpi/interpreter/amprep.c | 20 +- drivers/acpi/interpreter/amutils.c | 4 +- drivers/acpi/namespace/nsaccess.c | 6 +- drivers/acpi/namespace/nssearch.c | 12 +- drivers/acpi/parser/psargs.c | 4 +- drivers/acpi/parser/psparse.c | 18 +- drivers/acpi/power.c | 137 ++ drivers/acpi/sys.c | 20 +- drivers/block/loop.c | 2 +- drivers/char/Config.in | 2 +- drivers/char/Makefile | 2 + drivers/char/agp/agp.h | 7 + drivers/char/agp/agpgart_be.c | 71 +- drivers/md/lvm-snap.c | 216 ++- drivers/md/lvm.c | 1398 +++++++++++----- drivers/md/raid5.c | 1757 ++++++++------------- drivers/media/Makefile | 1 + drivers/net/pcmcia/Makefile | 8 +- drivers/net/sis900.c | 438 ++++- drivers/net/sis900.h | 13 +- drivers/pci/pci.ids | 2 + drivers/sbus/Makefile | 2 + drivers/zorro/Makefile | 14 +- fs/buffer.c | 11 +- fs/exec.c | 2 - fs/namei.c | 3 +- fs/nfsd/nfs3xdr.c | 20 +- fs/nfsd/vfs.c | 29 +- include/linux/acpi.h | 1 + include/linux/agp_backend.h | 1 + include/linux/lvm.h | 922 ++++++----- include/linux/mm.h | 1 - include/linux/raid/md_k.h | 1 + include/linux/raid/raid5.h | 197 ++- ipc/shm.c | 70 +- kernel/Makefile | 2 +- kernel/ksyms.c | 3 + kernel/softirq.c | 7 +- mm/filemap.c | 11 + mm/mlock.c | 6 - mm/mmap.c | 133 +- mm/mprotect.c | 3 - mm/mremap.c | 7 +- mm/shmem.c | 24 +- mm/swapfile.c | 3 +- net/ipv4/ip_fragment.c | 4 +- 97 files changed, 5023 insertions(+), 2724 deletions(-) create mode 100644 Documentation/DocBook/sis900.tmpl delete mode 100644 arch/i386/kernel/acpi.c create mode 100644 drivers/acpi/ec.h create mode 100644 drivers/acpi/power.c diff --git a/Documentation/Configure.help b/Documentation/Configure.help index 60ff8b1d2e8b..078b30045ade 100644 --- a/Documentation/Configure.help +++ b/Documentation/Configure.help @@ -2352,10 +2352,10 @@ CONFIG_AGP module, say M here and read Documentation/modules.txt. The module will be called agpgart.o. -Intel 440LX/BX/GX support +Intel 440LX/BX/GX/815/840/850 support CONFIG_AGP_INTEL This option gives you AGP support for the GLX component of the - XFree86 4.x on Intel 440LX/BX/GX, 815, and 840 chipsets. + XFree86 4.x on Intel 440LX/BX/GX, 815, 840 and 850 chipsets. For the moment, you should probably say N, unless you want to test the GLX component for XFree86 3.3.6, which can be downloaded from diff --git a/Documentation/DocBook/Makefile b/Documentation/DocBook/Makefile index 113aa69fa658..81e9a401aee9 100644 --- a/Documentation/DocBook/Makefile +++ b/Documentation/DocBook/Makefile @@ -1,6 +1,6 @@ BOOKS := wanbook.sgml z8530book.sgml mcabook.sgml videobook.sgml \ kernel-api.sgml parportbook.sgml kernel-hacking.sgml \ - kernel-locking.sgml via-audio.sgml mousedrivers.sgml + kernel-locking.sgml via-audio.sgml mousedrivers.sgml sis900.sgml PS := $(patsubst %.sgml, %.ps, $(BOOKS)) PDF := $(patsubst %.sgml, %.pdf, $(BOOKS)) @@ -51,6 +51,10 @@ via-audio.sgml: via-audio.tmpl $(TOPDIR)/drivers/sound/via82cxxx_audio.c $(TOPDIR)/scripts/docgen $(TOPDIR)/drivers/sound/via82cxxx_audio.c \ via-audio.sgml +sis900.sgml: sis900.tmpl $(TOPDIR)/drivers/net/sis900.c + $(TOPDIR)/scripts/docgen $(TOPDIR)/drivers/net/sis900.c \ + sis900.sgml + mcabook.sgml: mcabook.tmpl $(TOPDIR)/arch/i386/kernel/mca.c $(TOPDIR)/scripts/docgen $(TOPDIR)/arch/i386/kernel/mca.c \ mcabook.sgml diff --git a/Documentation/DocBook/kernel-locking.tmpl b/Documentation/DocBook/kernel-locking.tmpl index 9ea1fabe777e..cfce2afd38a3 100644 --- a/Documentation/DocBook/kernel-locking.tmpl +++ b/Documentation/DocBook/kernel-locking.tmpl @@ -155,7 +155,7 @@ - add 1 (5) + add 1 (6) write very_important_count (6) diff --git a/Documentation/DocBook/sis900.tmpl b/Documentation/DocBook/sis900.tmpl new file mode 100644 index 000000000000..88c8ff6a3d06 --- /dev/null +++ b/Documentation/DocBook/sis900.tmpl @@ -0,0 +1,585 @@ + + + + + + +SiS 900/7016 Fast Ethernet Device Driver + + + +Ollie +Lho + + + +Lei Chun +Chang + + + +Document Revision: 0.3 for SiS900 driver v1.06 & v1.07 +November 16, 2000 + + + 1999 + Silicon Integrated System Corp. + + + + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + + + + +This document gives some information on installation and usage of SiS 900/7016 +device driver under Linux. + + + + + + + + + Introduction + + +This document describes the revision 1.06 and 1.07 of SiS 900/7016 Fast Ethernet +device driver under Linux. The driver is developed by Silicon Integrated +System Corp. and distributed freely under the GNU General Public License (GPL). +The driver can be compiled as a loadable module and used under Linux kernel +version 2.2.x. (rev. 1.06) +With minimal changes, the driver can also be used under 2.3.x and 2.4.x kernel +(rev. 1.07), please see +. If you are intended to +use the driver for earlier kernels, you are on your own. + + + +The driver is tested with usual TCP/IP applications including +FTP, Telnet, Netscape etc. and is used constantly by the developers. + + + +Please send all comments/fixes/questions to +Lei-Chun Chang. + + + + + Changes + + +Changes made in Revision 1.07 + + + + +Separation of sis900.c and sis900.h in order to move most +constant definition to sis900.h (many of those constants were +corrected) + + + + + +Clean up PCI detection, the pci-scan from Donald Becker were not used, +just simple pci_find_*. + + + + + +MII detection is modified to support multiple mii transceiver. + + + + + +Bugs in read_eeprom, mdio_* were removed. + + + + + +Lot of sis900 irrelevant comments were removed/changed and +more comments were added to reflect the real situation. + + + + + +Clean up of physical/virtual address space mess in buffer +descriptors. + + + + + +Better transmit/receive error handling. + + + + + +The driver now uses zero-copy single buffer management +scheme to improve performance. + + + + + +Names of variables were changed to be more consistent. + + + + + +Clean up of auo-negotiation and timer code. + + + + + +Automatic detection and change of PHY on the fly. + + + + + +Bug in mac probing fixed. + + + + + +Fix 630E equalier problem by modifying the equalizer workaround rule. + + + + + +Support for ICS1893 10/100 Interated PHYceiver. + + + + + +Support for media select by ifconfig. + + + + + +Added kernel-doc extratable documentation. + + + + + + + + + Tested Environment + + +This driver is developed on the following hardware + + + + + +Intel Celeron 500 with SiS 630 (rev 02) chipset + + + + + +SiS 900 (rev 01) and SiS 7016/7014 Fast Ethernet Card + + + + + +and tested with these software environments + + + + + +Red Hat Linux version 6.2 + + + + + +Linux kernel version 2.4.0 + + + + + +Netscape version 4.6 + + + + + +NcFTP 3.0.0 beta 18 + + + + + +Samba version 2.0.3 + + + + + + + + + + +Files in This Package + + +In the package you can find these files: + + + + + + +sis900.c + + +Driver source file in C + + + + + +sis900.h + + +Header file for sis900.c + + + + + +sis900.sgml + + +DocBook SGML source of the document + + + + + +sis900.txt + + +Driver document in plain text + + + + + + + + + + Installation + + +Silicon Integrated System Corp. is cooperating closely with core Linux Kernel +developers. The revisions of SiS 900 driver are distributed by the usuall channels +for kernel tar files and patches. Those kernel tar files for official kernel and +patches for kernel pre-release can be download at +official kernel ftp site +and its mirrors. +The 1.06 revision can be found in kernel version later than 2.3.15 and pre-2.2.14, +and 1.07 revision can be found in kernel version 2.4.0. +If you have no prior experience in networking under Linux, please read +Ethernet HOWTO and +Networking HOWTO available from +Linux Documentation Project (LDP). + + + +The driver is bundled in release later than 2.2.11 and 2.3.15 so this +is the most easy case. +Be sure you have the appropriate packages for compiling kernel source. +Those packages are listed in Document/Changes in kernel source +distribution. If you have to install the driver other than those bundled +in kernel release, you should have your driver file +sis900.c and sis900.h +copied into /usr/src/linux/drivers/net/ first. +There are two alternative ways to install the driver + + + +Building the driver as loadable module + + +To build the driver as a loadable kernel module you have to reconfigure +the kernel to activate network support by + + + +make menuconfig + + + +Choose Loadable module support --->, +then select Enable loadable module support. + + + +Choose Network Device Support --->, select +Ethernet (10 or 100Mbit). +Then select EISA, VLB, PCI and on board controllers, +and choose SiS 900/7016 PCI Fast Ethernet Adapter support +to M. + + + +After reconfiguring the kernel, you can make the driver module by + + + +make modules + + + +The driver should be compiled with no errors. After compiling the driver, +the driver can be installed to proper place by + + + +make modules_install + + + +Load the driver into kernel by + + + +insmod sis900 + + + +When loading the driver into memory, some information message can be view by + + + + +dmesg + + +or + + +cat /var/log/message + + + + +If the driver is loaded properly you will have messages similar to this: + + + +sis900.c: v1.07.06 11/07/2000 +eth0: SiS 900 PCI Fast Ethernet at 0xd000, IRQ 10, 00:00:e8:83:7f:a4. +eth0: SiS 900 Internal MII PHY transceiver found at address 1. +eth0: Using SiS 900 Internal MII PHY as default + + + +showing the version of the driver and the results of probing routine. + + + +Once the driver is loaded, network can be brought up by + + + +/sbin/ifconfig eth0 IPADDR broadcast BROADCAST netmask NETMASK media TYPE + + + +where IPADDR, BROADCAST, NETMASK are your IP address, broadcast address and +netmask respectively. TYPE is used to set medium type used by the device. +Typical values are "10baseT"(twisted-pair 10Mbps Ethernet) or "100baseT" +(twisted-pair 100Mbps Ethernet). For more information on how to configure +network interface, please refer to +Networking HOWTO. + + + +The link status is also shown by kernel messages. For example, after the +network interface is activated, you may have the message: + + + +eth0: Media Link On 100mbps full-duplex + + + +If you try to unplug the twist pair (TP) cable you will get + + + +eth0: Media Link Off + + + +indicating that the link is failed. + + + + +Building the driver into kernel + + +If you want to make the driver into kernel, choose Y +rather than M on +SiS 900/7016 PCI Fast Ethernet Adapter support +when configuring the kernel. Build the kernel image in the usual way + + + +make dep + +make clean + +make bzlilo + + + +Next time the system reboot, you have the driver in memory. + + + + + + + Known Problems and Bugs + + +There are some known problems and bugs. If you find any other bugs please +mail to lcchang@sis.com.tw + + + + + +AM79C901 HomePNA PHY is not thoroughly tested, there may be some +bugs in the on the fly change of transceiver. + + + + + +A bug is hidden somewhere in the receive buffer management code, +the bug causes NULL pointer reference in the kernel. This fault is +caught before bad things happen and reported with the message: + + +eth0: NULL pointer encountered in Rx ring, skipping + + +which can be viewed with dmesg or +cat /var/log/message. + + + + + +The media type change from 10Mbps to 100Mbps twisted-pair ethernet +by ifconfig causes the media link down. + + + + + + + + + Revision History + + + + + + +November 13, 2000, Revision 1.07, seventh release, 630E problem fixed +and furthur clean up. + + + + + +November 4, 1999, Revision 1.06, Second release, lots of clean up +and optimization. + + + + + +August 8, 1999, Revision 1.05, Initial Public Release + + + + + + + + + Acknowledgements + + +This driver was originally derived form +Donald Becker's +pci-skeleton and +rtl8139 drivers. Donald also provided various suggestion +regarded with improvements made in revision 1.06. + + + +The 1.05 revision was created by +Jim Huang, AMD 79c901 +support was added by Chin-Shan Li. + + + + +List of Functions +!Idrivers/net/sis900.c + + + diff --git a/Documentation/networking/sis900.txt b/Documentation/networking/sis900.txt index b6de27947bf2..6e864fe53e29 100644 --- a/Documentation/networking/sis900.txt +++ b/Documentation/networking/sis900.txt @@ -5,14 +5,43 @@ Ollie Lho Lei Chun Chang - November 16, 2000. Document Revision: 0.3 + Copyright © 1999 by Silicon Integrated System Corp. This document gives some information on installation and usage of SiS 900/7016 device driver under Linux. + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or (at + your option) any later version. + + This program is distributed in the hope that it will be useful, but + WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 + USA _________________________________________________________________ + + Table of Contents + 1. Introduction + 2. Changes + 3. Tested Environment + 4. Files in This Package + 5. Installation + + Building the driver as loadable module + Building the driver into kernel + + 6. Known Problems and Bugs + 7. Revision History + 8. Acknowledgements _________________________________________________________________ -Introduction +Chapter 1. Introduction This document describes the revision 1.06 and 1.07 of SiS 900/7016 Fast Ethernet device driver under Linux. The driver is developed by @@ -20,9 +49,8 @@ Introduction General Public License (GPL). The driver can be compiled as a loadable module and used under Linux kernel version 2.2.x. (rev. 1.06) With minimal changes, the driver can also be used under 2.3.x and 2.4.x - kernel (rev. 1.07), please see the section called Installation. If you - are intended to use the driver for earlier kernels, you are on your - own. + kernel (rev. 1.07), please see Chapter 5. If you are intended to use + the driver for earlier kernels, you are on your own. The driver is tested with usual TCP/IP applications including FTP, Telnet, Netscape etc. and is used constantly by the developers. @@ -30,27 +58,7 @@ Introduction Please send all comments/fixes/questions to Lei-Chun Chang. _________________________________________________________________ -License - - Copyright (C) 1999 Silicon Integrated System Corp. - - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2 of the License, or - (at your option) any later version. - - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 U -SA - _________________________________________________________________ - -Changes +Chapter 2. Changes Changes made in Revision 1.07 @@ -75,9 +83,10 @@ Changes rule. 14. Support for ICS1893 10/100 Interated PHYceiver. 15. Support for media select by ifconfig. + 16. Added kernel-doc extratable documentation. _________________________________________________________________ -Tested Environment +Chapter 3. Tested Environment This driver is developed on the following hardware @@ -93,7 +102,7 @@ Tested Environment * Samba version 2.0.3 _________________________________________________________________ -Files in This Package +Chapter 4. Files in This Package In the package you can find these files: @@ -110,7 +119,7 @@ Files in This Package Driver document in plain text _________________________________________________________________ -Installation +Chapter 5. Installation Silicon Integrated System Corp. is cooperating closely with core Linux Kernel developers. The revisions of SiS 900 driver are distributed by @@ -179,11 +188,14 @@ eth0: Using SiS 900 Internal MII PHY as default Once the driver is loaded, network can be brought up by -/sbin/ifconfig eth0 IPADDR broadcast BROADCAST netmask NETMASK +/sbin/ifconfig eth0 IPADDR broadcast BROADCAST netmask NETMASK media TYPE where IPADDR, BROADCAST, NETMASK are your IP address, broadcast - address and netmask respectively. For more information on how to - configure network interface, please refer to Networking HOWTO. + address and netmask respectively. TYPE is used to set medium type used + by the device. Typical values are "10baseT"(twisted-pair 10Mbps + Ethernet) or "100baseT" (twisted-pair 100Mbps Ethernet). For more + information on how to configure network interface, please refer to + Networking HOWTO. The link status is also shown by kernel messages. For example, after the network interface is activated, you may have the message: @@ -212,7 +224,7 @@ make bzlilo Next time the system reboot, you have the driver in memory. _________________________________________________________________ -Known Problems and Bugs +Chapter 6. Known Problems and Bugs There are some known problems and bugs. If you find any other bugs please mail to lcchang@sis.com.tw @@ -224,9 +236,11 @@ Known Problems and Bugs caught before bad things happen and reported with the message: eth0: NULL pointer encountered in Rx ring, skipping which can be viewed with dmesg or cat /var/log/message. + 3. The media type change from 10Mbps to 100Mbps twisted-pair ethernet + by ifconfig causes the media link down. _________________________________________________________________ -Revision History +Chapter 7. Revision History * November 13, 2000, Revision 1.07, seventh release, 630E problem fixed and furthur clean up. @@ -235,7 +249,7 @@ Revision History * August 8, 1999, Revision 1.05, Initial Public Release _________________________________________________________________ -Acknowledgements +Chapter 8. Acknowledgements This driver was originally derived form Donald Becker's pci-skeleton and rtl8139 drivers. Donald also provided various suggestion regarded diff --git a/Makefile b/Makefile index 3dc4f3bbe004..6417c2c6869a 100644 --- a/Makefile +++ b/Makefile @@ -1,7 +1,7 @@ VERSION = 2 PATCHLEVEL = 4 SUBLEVEL = 0 -EXTRAVERSION = -test13-pre3 +EXTRAVERSION = -test13-pre4 KERNELRELEASE=$(VERSION).$(PATCHLEVEL).$(SUBLEVEL)$(EXTRAVERSION) @@ -159,7 +159,7 @@ DRIVERS-$(CONFIG_PCMCIA_NETCARD) += drivers/net/pcmcia/pcmcia_net.o DRIVERS-$(CONFIG_PCMCIA_CHRDEV) += drivers/char/pcmcia/pcmcia_char.o DRIVERS-$(CONFIG_DIO) += drivers/dio/dio.a DRIVERS-$(CONFIG_SBUS) += drivers/sbus/sbus_all.o -DRIVERS-$(CONFIG_ZORRO) += drivers/zorro/zorro.a +DRIVERS-$(CONFIG_ZORRO) += drivers/zorro/driver.o DRIVERS-$(CONFIG_FC4) += drivers/fc4/fc4.a DRIVERS-$(CONFIG_ALL_PPC) += drivers/macintosh/macintosh.o DRIVERS-$(CONFIG_MAC) += drivers/macintosh/macintosh.o diff --git a/arch/alpha/config.in b/arch/alpha/config.in index 649047e6659e..fe0471a26356 100644 --- a/arch/alpha/config.in +++ b/arch/alpha/config.in @@ -3,6 +3,7 @@ # see Documentation/kbuild/config-language.txt. # +define_bool CONFIG_ALPHA y define_bool CONFIG_UID16 n mainmenu_name "Kernel configuration of Linux for Alpha machines" diff --git a/arch/i386/kernel/Makefile b/arch/i386/kernel/Makefile index cb41a22bbde5..6e7535a70abd 100644 --- a/arch/i386/kernel/Makefile +++ b/arch/i386/kernel/Makefile @@ -40,6 +40,5 @@ obj-$(CONFIG_SMP) += smp.o smpboot.o trampoline.o obj-$(CONFIG_X86_LOCAL_APIC) += apic.o obj-$(CONFIG_X86_IO_APIC) += io_apic.o mpparse.o obj-$(CONFIG_X86_VISWS_APIC) += visws_apic.o -obj-$(CONFIG_ACPI) += acpi.o include $(TOPDIR)/Rules.make diff --git a/arch/i386/kernel/acpi.c b/arch/i386/kernel/acpi.c deleted file mode 100644 index becbe8840719..000000000000 --- a/arch/i386/kernel/acpi.c +++ /dev/null @@ -1,53 +0,0 @@ -/* - * acpi.c - Linux ACPI arch-specific functions - * - * Copyright (C) 1999-2000 Andrew Henroid - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -/* - * Changes: - * Arnaldo Carvalho de Melo - 2000/08/31 - * - check copy*user return - * - get rid of check_region - * - get rid of verify_area - * Arnaldo Carvalho de Melo - 2000/09/28 - * - do proper release on failure in acpi_claim_ioports and acpi_init - * Andrew Grover - 2000/11/13 - * - Took out support for user-level interpreter. ACPI 2.0 changes preclude - * its maintenance. - */ - -#include -#include -#include -#include - -#define _LINUX -#include -/* Is there a better way to include this? */ -#include <../drivers/acpi/include/acpi.h> - -ACPI_PHYSICAL_ADDRESS -acpi_get_rsdp_ptr() -{ - ACPI_PHYSICAL_ADDRESS rsdp_phys; - - if(ACPI_SUCCESS(acpi_find_root_pointer(&rsdp_phys))) - return rsdp_phys; - else - return 0; -} diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index fffe11398481..74af012b4055 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -477,7 +477,7 @@ static int __init copy_e820_map(struct e820entry * biosmap, int nr_map) if (start < 0x100000ULL && end > 0xA0000ULL) { if (start < 0xA0000ULL) add_memory_region(start, 0xA0000ULL-start, type); - if (end < 0x100000ULL) + if (end <= 0x100000ULL) continue; start = 0x100000ULL; size = end - start; @@ -518,7 +518,7 @@ void __init setup_memory_region(void) e820.nr_map = 0; add_memory_region(0, LOWMEMSIZE(), E820_RAM); - add_memory_region(HIGH_MEMORY, mem_size << 10, E820_RAM); + add_memory_region(HIGH_MEMORY, (mem_size << 10) - HIGH_MEMORY, E820_RAM); } printk("BIOS-provided physical RAM map:\n"); print_memory_map(who); diff --git a/arch/i386/math-emu/fpu_system.h b/arch/i386/math-emu/fpu_system.h index 3cda85f6515a..1da2e1ff9c26 100644 --- a/arch/i386/math-emu/fpu_system.h +++ b/arch/i386/math-emu/fpu_system.h @@ -20,7 +20,7 @@ of the stack frame of math_emulate() */ #define SETUP_DATA_AREA(arg) FPU_info = (struct info *) &arg -#define LDT_DESCRIPTOR(s) (((struct desc_struct *)current->mm->segments)[(s) >> 3]) +#define LDT_DESCRIPTOR(s) (((struct desc_struct *)current->mm->context.segments)[(s) >> 3]) #define SEG_D_SIZE(x) ((x).b & (3 << 21)) #define SEG_G_BIT(x) ((x).b & (1 << 23)) #define SEG_GRANULARITY(x) (((x).b & (1 << 23)) ? 4096 : 1) diff --git a/arch/m68k/amiga/Makefile b/arch/m68k/amiga/Makefile index b0211f3a79df..a543407a4f27 100644 --- a/arch/m68k/amiga/Makefile +++ b/arch/m68k/amiga/Makefile @@ -8,11 +8,11 @@ # Note 2! The CFLAGS definitions are now in the main makefile... O_TARGET := amiga.o -O_OBJS := config.o amiints.o cia.o chipram.o amisound.o -OX_OBJS := amiga_ksyms.o -ifdef CONFIG_AMIGA_PCMCIA -O_OBJS := $(O_OBJS) pcmcia.o -endif +export-objs := amiga_ksyms.o + +obj-y := config.o amiints.o cia.o chipram.o amisound.o amiga_ksyms.o + +obj-$(CONFIG_AMIGA_PCMCIA) += pcmcia.o include $(TOPDIR)/Rules.make diff --git a/arch/m68k/apollo/Makefile b/arch/m68k/apollo/Makefile index ba57e005dd02..bad1bbd50074 100644 --- a/arch/m68k/apollo/Makefile +++ b/arch/m68k/apollo/Makefile @@ -8,7 +8,7 @@ # Note 2! The CFLAGS definitions are now in the main makefile... O_TARGET := apollo.o -O_OBJS := config.o dn_ints.o dma.o \ +obj-y := config.o dn_ints.o dma.o include $(TOPDIR)/Rules.make diff --git a/arch/m68k/atari/Makefile b/arch/m68k/atari/Makefile index bfc82284afa2..363368f5cbca 100644 --- a/arch/m68k/atari/Makefile +++ b/arch/m68k/atari/Makefile @@ -8,14 +8,14 @@ # Note 2! The CFLAGS definitions are now in the main makefile... O_TARGET := atari.o -O_OBJS := config.o time.o debug.o atakeyb.o ataints.o stdma.o atasound.o \ - joystick.o stram.o -OX_OBJS := atari_ksyms.o + +export-objs := atari_ksyms.o + +obj-y := config.o time.o debug.o atakeyb.o ataints.o stdma.o \ + atasound.o joystick.o stram.o atari_ksyms.o ifdef CONFIG_PCI -ifdef CONFIG_HADES -O_OBJS += hades-pci.o -endif +obj-$(CONFIG_HADES) += hades-pci.o endif include $(TOPDIR)/Rules.make diff --git a/arch/m68k/bvme6000/Makefile b/arch/m68k/bvme6000/Makefile index e8670c60b40b..3a2fad598775 100644 --- a/arch/m68k/bvme6000/Makefile +++ b/arch/m68k/bvme6000/Makefile @@ -8,7 +8,7 @@ # Note 2! The CFLAGS definitions are now in the main makefile... O_TARGET := bvme6000.o -O_OBJS := config.o bvmeints.o rtc.o -#OX_OBJS = ksyms.o + +obj-y := config.o bvmeints.o rtc.o include $(TOPDIR)/Rules.make diff --git a/arch/m68k/hp300/Makefile b/arch/m68k/hp300/Makefile index b0252c40cd2a..29decf03d3d0 100644 --- a/arch/m68k/hp300/Makefile +++ b/arch/m68k/hp300/Makefile @@ -8,10 +8,11 @@ # Note 2! The CFLAGS definitions are now in the main makefile... O_TARGET := hp300.o -O_OBJS := ksyms.o config.o ints.o time.o reboot.o -ifdef CONFIG_VT -O_OBJS += hil.o -endif +export-objs := ksyms.o + +obj-y := ksyms.o config.o ints.o time.o reboot.o + +obj-$(CONFIG_VT) += hil.o include $(TOPDIR)/Rules.make diff --git a/arch/m68k/kernel/Makefile b/arch/m68k/kernel/Makefile index afd9e4040837..82e7bfc8b497 100644 --- a/arch/m68k/kernel/Makefile +++ b/arch/m68k/kernel/Makefile @@ -17,13 +17,13 @@ all: sun3-head.o kernel.o endif O_TARGET := kernel.o -O_OBJS := entry.o process.o traps.o ints.o signal.o ptrace.o \ - sys_m68k.o time.o semaphore.o -OX_OBJS := setup.o m68k_ksyms.o -ifdef CONFIG_PCI -O_OBJS += bios32.o -endif +export-objs := setup.o m68k_ksyms.o + +obj-y := entry.o process.o traps.o ints.o signal.o ptrace.o \ + sys_m68k.o time.o semaphore.o setup.o m68k_ksyms.o + +obj-$(CONFIG_PCI) += bios32.o head.o: head.S m68k_defs.h diff --git a/arch/m68k/lib/Makefile b/arch/m68k/lib/Makefile index a230bad9ed73..7d2dc0243863 100644 --- a/arch/m68k/lib/Makefile +++ b/arch/m68k/lib/Makefile @@ -6,6 +6,8 @@ $(CC) $(AFLAGS) -traditional -c $< -o $@ L_TARGET = lib.a -L_OBJS = ashrdi3.o lshrdi3.o checksum.o memcpy.o memcmp.o memset.o semaphore.o muldi3.o + +obj-y := ashrdi3.o lshrdi3.o checksum.o memcpy.o memcmp.o memset.o \ + semaphore.o muldi3.o include $(TOPDIR)/Rules.make diff --git a/arch/m68k/mac/Makefile b/arch/m68k/mac/Makefile index f50cd262c9c5..b32afc0629d8 100644 --- a/arch/m68k/mac/Makefile +++ b/arch/m68k/mac/Makefile @@ -8,8 +8,10 @@ # Note 2! The CFLAGS definitions are now in the main makefile... O_TARGET := mac.o -OX_OBJS := mac_ksyms.o -O_OBJS := config.o bootparse.o macints.o iop.o via.o oss.o psc.o \ - baboon.o macboing.o debug.o misc.o + +export-objs := mac_ksyms.o + +obj-y := config.o bootparse.o macints.o iop.o via.o oss.o psc.o \ + baboon.o macboing.o debug.o misc.o mac_ksyms.o include $(TOPDIR)/Rules.make diff --git a/arch/m68k/math-emu/Makefile b/arch/m68k/math-emu/Makefile index 78d295d6bb12..c2e505f64a26 100644 --- a/arch/m68k/math-emu/Makefile +++ b/arch/m68k/math-emu/Makefile @@ -13,7 +13,8 @@ #EXTRA_CFLAGS=-DFPU_EMU_DEBUG O_TARGET := mathemu.o -O_OBJS := fp_entry.o fp_scan.o fp_util.o fp_move.o fp_movem.o \ - fp_cond.o fp_arith.o fp_log.o fp_trig.o + +obj-y := fp_entry.o fp_scan.o fp_util.o fp_move.o fp_movem.o \ + fp_cond.o fp_arith.o fp_log.o fp_trig.o include $(TOPDIR)/Rules.make diff --git a/arch/m68k/mm/Makefile b/arch/m68k/mm/Makefile index fdc73c35d261..d0a3c434128b 100644 --- a/arch/m68k/mm/Makefile +++ b/arch/m68k/mm/Makefile @@ -8,12 +8,13 @@ # Note 2! The CFLAGS definition is now in the main makefile... O_TARGET := mm.o -O_OBJS := init.o fault.o extable.o hwtest.o + +obj-y := init.o fault.o extable.o hwtest.o ifndef CONFIG_SUN3 -O_OBJS += kmap.o memory.o motorola.o +obj-y += kmap.o memory.o motorola.o else -O_OBJS += sun3mmu.o +obj-y += sun3mmu.o endif diff --git a/arch/m68k/mvme147/Makefile b/arch/m68k/mvme147/Makefile index c0f064118ffc..d5fe30ce9977 100644 --- a/arch/m68k/mvme147/Makefile +++ b/arch/m68k/mvme147/Makefile @@ -7,8 +7,8 @@ # O_TARGET := mvme147.o -O_OBJS := config.o 147ints.o +obj-y := config.o 147ints.o include $(TOPDIR)/Rules.make diff --git a/arch/m68k/mvme16x/Makefile b/arch/m68k/mvme16x/Makefile index 1c6bfd3ca828..900ec7bbd929 100644 --- a/arch/m68k/mvme16x/Makefile +++ b/arch/m68k/mvme16x/Makefile @@ -8,7 +8,9 @@ # Note 2! The CFLAGS definitions are now in the main makefile... O_TARGET := mvme16x.o -O_OBJS := config.o 16xints.o rtc.o -OX_OBJS := mvme16x_ksyms.o + +export-objs := mvme16x_ksyms.o + +obj-y := config.o 16xints.o rtc.o mvme16x_ksyms.o include $(TOPDIR)/Rules.make diff --git a/arch/m68k/q40/Makefile b/arch/m68k/q40/Makefile index 90858884f6b1..fe36baa7c9d0 100644 --- a/arch/m68k/q40/Makefile +++ b/arch/m68k/q40/Makefile @@ -8,7 +8,7 @@ # Note 2! The CFLAGS definitions are now in the main makefile... O_TARGET := q40.o -O_OBJS := config.o q40ints.o +obj-y := config.o q40ints.o include $(TOPDIR)/Rules.make diff --git a/arch/m68k/sun3/Makefile b/arch/m68k/sun3/Makefile index 577667e5c14f..b135eb22ba0e 100644 --- a/arch/m68k/sun3/Makefile +++ b/arch/m68k/sun3/Makefile @@ -11,7 +11,10 @@ $(CC) $(AFLAGS) -traditional -Wa,-m68020 -c $< -o $*.o O_TARGET := sun3.o -O_OBJS := config.o idprom.o mmu_emu.o sun3ints.o leds.o dvma.o sbus.o intersil.o -OX_OBJS := sun3_ksyms.o + +export-objs := sun3_ksyms.o + +obj-y := config.o idprom.o mmu_emu.o sun3ints.o leds.o dvma.o \ + sbus.o intersil.o sun3_ksyms.o include $(TOPDIR)/Rules.make diff --git a/arch/m68k/sun3x/Makefile b/arch/m68k/sun3x/Makefile index ba1f2bbd5359..aa9dadbf5545 100644 --- a/arch/m68k/sun3x/Makefile +++ b/arch/m68k/sun3x/Makefile @@ -8,7 +8,7 @@ # Note 2! The CFLAGS definitions are now in the main makefile... O_TARGET := sun3x.o -O_OBJS := config.o time.o dvma.o sbus.o -OX_OBJS := + +obj-y := config.o time.o dvma.o sbus.o include $(TOPDIR)/Rules.make diff --git a/arch/ppc/amiga/Makefile b/arch/ppc/amiga/Makefile index 77f27a8c894a..d1a02038af94 100644 --- a/arch/ppc/amiga/Makefile +++ b/arch/ppc/amiga/Makefile @@ -9,9 +9,11 @@ O_TARGET := amiga.o -obj-y := config.o amiints.o cia.o time.o bootinfo.o amisound.o chipram.o export-objs := amiga_ksyms.o -objs-$(CONFIG_AMIGA_PCMCIA) += pcmia.o +obj-y := config.o amiints.o cia.o time.o bootinfo.o amisound.o \ + chipram.o amiga_ksyms.o + +obj-$(CONFIG_AMIGA_PCMCIA) += pcmia.o include $(TOPDIR)/Rules.make diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile index ccac0a12a667..307cb08ad724 100644 --- a/drivers/acpi/Makefile +++ b/drivers/acpi/Makefile @@ -31,7 +31,7 @@ obj-$(CONFIG_ACPI) += os.o ksyms.o ifdef CONFIG_ACPI_KERNEL_CONFIG obj-$(CONFIG_ACPI) += acpiconf.o osconf.o else - obj-$(CONFIG_ACPI) += driver.o cmbatt.o cpu.o ec.o ksyms.o sys.o table.o + obj-$(CONFIG_ACPI) += driver.o cmbatt.o cpu.o ec.o ksyms.o sys.o table.o power.o endif include $(TOPDIR)/Rules.make diff --git a/drivers/acpi/cmbatt.c b/drivers/acpi/cmbatt.c index ce78087430f5..c45aa810c12e 100644 --- a/drivers/acpi/cmbatt.c +++ b/drivers/acpi/cmbatt.c @@ -17,31 +17,58 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +/* + * Changes: + * Brendan Burns 2000-11-15 + * - added proc battery interface + * - parse returned data from _BST and _BIF + * Andy Grover 2000-12-8 + * - improved proc interface + */ #include #include -#include -#include -#include -#include -#include +#include #include "acpi.h" #include "driver.h" #define _COMPONENT OS_DEPENDENT MODULE_NAME ("cmbatt") +/* ACPI-specific defines */ #define ACPI_CMBATT_HID "PNP0C0A" - #define ACPI_BATT_PRESENT 0x10 +#define ACPI_BATT_UNKNOWN 0xFFFFFFFF + +/* driver-specific defines */ +#define MAX_CM_BATTERIES 0x8 +#define MAX_BATT_STRLEN 0x20 + +struct cmbatt_info +{ + u32 power_unit; + u32 design_capacity; + u32 last_full_capacity; + u32 battery_technology; + u32 design_voltage; + u32 design_capacity_warning; + u32 design_capacity_low; + u32 battery_capacity_granularity_1; + u32 battery_capacity_granularity_2; -#define ACPI_MAX_BATTERIES 0x8 + char model_number[MAX_BATT_STRLEN]; + char serial_number[MAX_BATT_STRLEN]; + char battery_type[MAX_BATT_STRLEN]; + char oem_info[MAX_BATT_STRLEN]; +}; struct cmbatt_context { - char UID[9]; - u8 is_present; + u32 is_present; ACPI_HANDLE handle; + char UID[9]; + char *power_unit; + struct cmbatt_info info; }; struct cmbatt_status @@ -54,7 +81,98 @@ struct cmbatt_status static u32 batt_count = 0; -static struct cmbatt_context batt_list[ACPI_MAX_BATTERIES]; +static struct cmbatt_context batt_list[MAX_CM_BATTERIES]; + +static ACPI_STATUS +acpi_get_battery_status(ACPI_HANDLE handle, struct cmbatt_status *result) +{ + ACPI_OBJECT *obj; + ACPI_OBJECT *objs; + ACPI_BUFFER buf; + + buf.length = 0; + buf.pointer = NULL; + + /* determine buffer length needed */ + if (acpi_evaluate_object(handle, "_BST", NULL, &buf) != AE_BUFFER_OVERFLOW) { + printk(KERN_ERR "Cmbatt: Could not get battery status struct length\n"); + return AE_NOT_FOUND; + } + + buf.pointer = kmalloc(buf.length, GFP_KERNEL); + if (!buf.pointer) + return AE_NO_MEMORY; + + /* get the data */ + if (!ACPI_SUCCESS(acpi_evaluate_object(handle, "_BST", NULL, &buf))) { + printk(KERN_ERR "Cmbatt: Could not get battery status\n"); + kfree (buf.pointer); + return AE_NOT_FOUND; + } + + obj = (ACPI_OBJECT *) buf.pointer; + objs = obj->package.elements; + + result->state = objs[0].number.value; + result->present_rate = objs[1].number.value; + result->remaining_capacity = objs[2].number.value; + result->present_voltage = objs[3].number.value; + + kfree(buf.pointer); + + return AE_OK; +} + +static ACPI_STATUS +acpi_get_battery_info(ACPI_HANDLE handle, struct cmbatt_info *result) +{ + ACPI_OBJECT *obj; + ACPI_OBJECT *objs; + ACPI_BUFFER buf; + + buf.length = 0; + buf.pointer = NULL; + + /* determine the length of the data */ + if (acpi_evaluate_object(handle, "_BIF", NULL, &buf) != AE_BUFFER_OVERFLOW) { + printk(KERN_ERR "Cmbatt: Could not get battery info struct length\n"); + return AE_NOT_FOUND; + } + + buf.pointer = kmalloc(buf.length, GFP_KERNEL); + if (!buf.pointer) + return AE_NO_MEMORY; + + /* get the data */ + if (!ACPI_SUCCESS(acpi_evaluate_object(handle, "_BIF", NULL, &buf))) { + printk(KERN_ERR "Cmbatt: Could not get battery info\n"); + kfree (buf.pointer); + return AE_NOT_FOUND; + } + + obj = (ACPI_OBJECT *) buf.pointer; + objs = obj->package.elements; + + result->power_unit=objs[0].number.value; + result->design_capacity=objs[1].number.value; + result->last_full_capacity=objs[2].number.value; + result->battery_technology=objs[3].number.value; + result->design_voltage=objs[4].number.value; + result->design_capacity_warning=objs[5].number.value; + result->design_capacity_low=objs[6].number.value; + result->battery_capacity_granularity_1=objs[7].number.value; + result->battery_capacity_granularity_2=objs[8].number.value; + + /* BUG: trailing NULL issue */ + strncpy(result->model_number, objs[9].string.pointer, MAX_BATT_STRLEN-1); + strncpy(result->serial_number, objs[10].string.pointer, MAX_BATT_STRLEN-1); + strncpy(result->battery_type, objs[11].string.pointer, MAX_BATT_STRLEN-1); + strncpy(result->oem_info, objs[12].string.pointer, MAX_BATT_STRLEN-1); + + kfree(buf.pointer); + + return AE_OK; +} /* * We found a device with the correct HID @@ -63,9 +181,14 @@ static ACPI_STATUS acpi_found_cmbatt(ACPI_HANDLE handle, u32 level, void *ctx, void **value) { ACPI_DEVICE_INFO info; - + + if (batt_count >= MAX_CM_BATTERIES) { + printk(KERN_ERR "Cmbatt: MAX_CM_BATTERIES exceeded\n"); + return AE_OK; + } + if (!ACPI_SUCCESS(acpi_get_object_info(handle, &info))) { - printk(KERN_ERR "Could not get battery object info\n"); + printk(KERN_ERR "Cmbatt: Could not get battery object info\n"); return (AE_OK); } @@ -73,69 +196,207 @@ acpi_found_cmbatt(ACPI_HANDLE handle, u32 level, void *ctx, void **value) strncpy(batt_list[batt_count].UID, info.unique_id, 9); } else if (batt_count > 1) { - printk(KERN_WARNING "ACPI: No UID but more than 1 battery\n"); + printk(KERN_WARNING "Cmbatt: No UID but more than 1 battery\n"); + } + + if (!(info.valid & ACPI_VALID_STA)) { + printk(KERN_ERR "Cmbatt: Battery _STA invalid\n"); + return AE_OK; } - if ((info.valid & ACPI_VALID_STA) && - (info.current_status & ACPI_BATT_PRESENT)) { + if (!(info.current_status & ACPI_BATT_PRESENT)) { + printk(KERN_INFO "Cmbatt: Battery socket %d empty\n", batt_count); + batt_list[batt_count].is_present = FALSE; + } + else { + printk(KERN_INFO "Cmbatt: Battery socket %d occupied\n", batt_count); + batt_list[batt_count].is_present = TRUE; + if (acpi_get_battery_info(handle, &batt_list[batt_count].info) != AE_OK) { + printk(KERN_ERR "acpi_get_battery_info failed\n"); + return AE_OK; + } - ACPI_BUFFER buf; + batt_list[batt_count].power_unit = (batt_list[batt_count].info.power_unit) ? "mA" : "mW"; + } + + batt_list[batt_count].handle = handle; - printk("ACPI: Found a battery\n"); - batt_list[batt_count].is_present = TRUE; + batt_count++; - buf.length = 0; - buf.pointer = NULL; + return AE_OK; +} - /* determine buffer length needed */ - if (acpi_evaluate_object(handle, "_BST", NULL, &buf) != AE_BUFFER_OVERFLOW) - return AE_OK; +static int +proc_read_batt_info(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + struct cmbatt_info *info; + u32 batt_num = (u32) data; + char *p = page; + int len; - buf.pointer = kmalloc(buf.length, GFP_KERNEL); - - if (!buf.pointer) - return AE_NO_MEMORY; + info = &batt_list[batt_num].info; - /* get the data */ - if (!ACPI_SUCCESS(acpi_evaluate_object(handle, "_BST", NULL, &buf))) { - printk(KERN_ERR "Could not get battery status\n"); - kfree (buf.pointer); - return AE_OK; - } + /* don't get info more than once for a single proc read */ + if (off != 0) + goto end; - kfree(buf.pointer); + if (!batt_list[batt_num].is_present) { + p += sprintf(p, "battery %d not present\n", batt_num); + goto end; + } + + if (info->last_full_capacity == ACPI_BATT_UNKNOWN) + p += sprintf(p, "Unknown last full capacity\n"); + else + p += sprintf(p, "Last Full Capacity %x %s /hr\n", + info->last_full_capacity, batt_list[batt_num].power_unit); + + if (info->design_capacity == ACPI_BATT_UNKNOWN) + p += sprintf(p, "Unknown Design Capacity\n"); + else + p += sprintf(p, "Design Capacity %x %s /hr\n", + info->design_capacity, batt_list[batt_num].power_unit); + + if (info->battery_technology) + p += sprintf(p, "Secondary Battery Technology\n"); + else + p += sprintf(p, "Primary Battery Technology\n"); + + if (info->design_voltage == ACPI_BATT_UNKNOWN) + p += sprintf(p, "Unknown Design Voltage\n"); + else + p += sprintf(p, "Design Voltage %x mV\n", + info->design_voltage); + + p += sprintf(p, "Design Capacity Warning %d\n", + info->design_capacity_warning); + p += sprintf(p, "Design Capacity Low %d\n", + info->design_capacity_low); + p += sprintf(p, "Battery Capacity Granularity 1 %d\n", + info->battery_capacity_granularity_1); + p += sprintf(p, "Battery Capacity Granularity 2 %d\n", + info->battery_capacity_granularity_2); + p += sprintf(p, "model number %s\nserial number %s\nbattery type %s\nOEM info %s\n", + info->model_number,info->serial_number, + info->battery_type,info->oem_info); +end: + len = (p - page); + if (len <= off+count) *eof = 1; + *start = page + off; + len -= off; + if (len>count) len = count; + if (len<0) len = 0; + return len; +} + +static int +proc_read_batt_status(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + struct cmbatt_status status; + u32 batt_num = (u32) data; + char *p = page; + int len; - /* TODO: parse the battery data */ - /* TODO: add proc interface */ + /* don't get status more than once for a single proc read */ + if (off != 0) + goto end; + + if (!batt_list[batt_num].is_present) { + p += sprintf(p, "battery %d not present\n", batt_num); + goto end; } - else { - printk("ACPI: Found an empty battery socket\n"); - batt_list[batt_count].is_present = FALSE; + + printk("getting batt status\n"); + + if (acpi_get_battery_status(batt_list[batt_num].handle, &status) != AE_OK) { + printk(KERN_ERR "Cmbatt: acpi_get_battery_status failed\n"); + goto end; } - batt_list[batt_count].handle = handle; + p += sprintf(p, "Remaining Capacity: %x\n", status.remaining_capacity); - batt_count++; + if (status.state & 0x1) + p += sprintf(p, "Battery discharging\n"); + if (status.state & 0x2) + p += sprintf(p, "Battery charging\n"); + if (status.state & 0x4) + p += sprintf(p, "Battery critically low\n"); + + if (status.present_rate == ACPI_BATT_UNKNOWN) + p += sprintf(p, "Battery rate unknown\n"); + else + p += sprintf(p, "Battery rate %x\n", + status.present_rate); + + if (status.remaining_capacity == ACPI_BATT_UNKNOWN) + p += sprintf(p, "Battery capacity unknown\n"); + else + p += sprintf(p, "Battery capacity %x %s\n", + status.remaining_capacity, batt_list[batt_num].power_unit); - return (AE_OK); + if (status.present_voltage == ACPI_BATT_UNKNOWN) + p += sprintf(p, "Battery voltage unknown\n"); + else + p += sprintf(p, "Battery voltage %x volts\n", + status.present_voltage); + +end: + + len = (p - page); + if (len <= off+count) *eof = 1; + *start = page + off; + len -= off; + if (len>count) len = count; + if (len<0) len = 0; + return len; } + + int acpi_cmbatt_init(void) { + int i; + acpi_get_devices(ACPI_CMBATT_HID, acpi_found_cmbatt, NULL, NULL); + for (i = 0; i < batt_count; i++) { + + char batt_name[20]; + + sprintf(batt_name, "power/batt%d_info", i); + create_proc_read_entry(batt_name, 0, NULL, + proc_read_batt_info, (void *) i); + + sprintf(batt_name, "power/batt%d_status", i); + create_proc_read_entry(batt_name, 0, NULL, + proc_read_batt_status, (void *) i); + + } + return 0; } int acpi_cmbatt_terminate(void) { - /* TODO */ - /* walk list of batteries */ - /* free their context and release resources */ + int i; + + for (i = 0; i < batt_count; i++) { + + char batt_name[20]; + + sprintf(batt_name, "power/batt%d_info", i); + remove_proc_entry(batt_name, NULL); + + sprintf(batt_name, "power/batt%d_status", i); + remove_proc_entry(batt_name, NULL); + } + return 0; } diff --git a/drivers/acpi/common/cmcopy.c b/drivers/acpi/common/cmcopy.c index 293c728905d6..68b7bda0152f 100644 --- a/drivers/acpi/common/cmcopy.c +++ b/drivers/acpi/common/cmcopy.c @@ -1,7 +1,7 @@ /****************************************************************************** * * Module Name: cmcopy - Internal to external object translation utilities - * $Revision: 59 $ + * $Revision: 61 $ * *****************************************************************************/ @@ -102,7 +102,7 @@ acpi_cm_build_external_simple_object ( case ACPI_TYPE_STRING: - length = internal_obj->string.length; + length = internal_obj->string.length + 1; external_obj->string.length = internal_obj->string.length; external_obj->string.pointer = (NATIVE_CHAR *) data_space; source_ptr = (u8 *) internal_obj->string.pointer; diff --git a/drivers/acpi/common/cminit.c b/drivers/acpi/common/cminit.c index babf941d2f77..e6cfb7655d4b 100644 --- a/drivers/acpi/common/cminit.c +++ b/drivers/acpi/common/cminit.c @@ -1,7 +1,7 @@ /****************************************************************************** * * Module Name: cminit - Common ACPI subsystem initialization - * $Revision: 89 $ + * $Revision: 91 $ * *****************************************************************************/ @@ -53,7 +53,7 @@ static ACPI_STATUS acpi_cm_fadt_register_error ( NATIVE_CHAR *register_name, - UINT64 value) + u32 value) { REPORT_ERROR ( @@ -96,25 +96,25 @@ acpi_cm_validate_fadt ( if (!acpi_gbl_FADT->pm1_cnt_len) { status = acpi_cm_fadt_register_error ("PM1_CNT_LEN", - (u32) acpi_gbl_FADT->pm1_cnt_len); + 0); } - if (!acpi_gbl_FADT->Xpm1a_evt_blk.address) { + if (!ACPI_VALID_ADDRESS (acpi_gbl_FADT->Xpm1a_evt_blk.address)) { status = acpi_cm_fadt_register_error ("PM1a_EVT_BLK", - acpi_gbl_FADT->Xpm1a_evt_blk.address); + 0); } - if (!acpi_gbl_FADT->Xpm1a_cnt_blk.address) { + if (!ACPI_VALID_ADDRESS (acpi_gbl_FADT->Xpm1a_cnt_blk.address)) { status = acpi_cm_fadt_register_error ("PM1a_CNT_BLK", - acpi_gbl_FADT->Xpm1a_cnt_blk.address); + 0); } - if (!acpi_gbl_FADT->Xpm_tmr_blk.address) { + if (!ACPI_VALID_ADDRESS (acpi_gbl_FADT->Xpm_tmr_blk.address)) { status = acpi_cm_fadt_register_error ("PM_TMR_BLK", - acpi_gbl_FADT->Xpm_tmr_blk.address); + 0); } - if ((acpi_gbl_FADT->Xpm2_cnt_blk.address && + if ((ACPI_VALID_ADDRESS (acpi_gbl_FADT->Xpm2_cnt_blk.address) && !acpi_gbl_FADT->pm2_cnt_len)) { status = acpi_cm_fadt_register_error ("PM2_CNT_LEN", @@ -129,14 +129,14 @@ acpi_cm_validate_fadt ( /* length of GPE blocks must be a multiple of 2 */ - if (acpi_gbl_FADT->Xgpe0blk.address && + if (ACPI_VALID_ADDRESS (acpi_gbl_FADT->Xgpe0blk.address) && (acpi_gbl_FADT->gpe0blk_len & 1)) { status = acpi_cm_fadt_register_error ("GPE0_BLK_LEN", (u32) acpi_gbl_FADT->gpe0blk_len); } - if (acpi_gbl_FADT->Xgpe1_blk.address && + if (ACPI_VALID_ADDRESS (acpi_gbl_FADT->Xgpe1_blk.address) && (acpi_gbl_FADT->gpe1_blk_len & 1)) { status = acpi_cm_fadt_register_error ("GPE1_BLK_LEN", diff --git a/drivers/acpi/common/cmobject.c b/drivers/acpi/common/cmobject.c index 1cce4a5524ef..95e70fb14b0b 100644 --- a/drivers/acpi/common/cmobject.c +++ b/drivers/acpi/common/cmobject.c @@ -1,7 +1,7 @@ /****************************************************************************** * * Module Name: cmobject - ACPI object create/delete/size/cache routines - * $Revision: 32 $ + * $Revision: 34 $ * *****************************************************************************/ @@ -424,7 +424,7 @@ acpi_cm_get_simple_object_size ( case ACPI_TYPE_STRING: - length += internal_obj->string.length; + length += internal_obj->string.length + 1; break; diff --git a/drivers/acpi/dispatcher/dswstate.c b/drivers/acpi/dispatcher/dswstate.c index cac9f24caa07..a15a6f5f7eb2 100644 --- a/drivers/acpi/dispatcher/dswstate.c +++ b/drivers/acpi/dispatcher/dswstate.c @@ -1,7 +1,7 @@ /****************************************************************************** * * Module Name: dswstate - Dispatcher parse tree walk management routines - * $Revision: 35 $ + * $Revision: 36 $ * *****************************************************************************/ @@ -253,7 +253,7 @@ acpi_ds_result_pop_from_bottom ( return (AE_AML_NO_OPERAND); } - + return (AE_OK); } diff --git a/drivers/acpi/driver.c b/drivers/acpi/driver.c index 7173cb817d0b..222598120b6c 100644 --- a/drivers/acpi/driver.c +++ b/drivers/acpi/driver.c @@ -17,6 +17,11 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +/* + * Changes + * David Woodhouse 2000-12-6 + * - Fix interruptible_sleep_on() races + */ #include #include @@ -32,6 +37,12 @@ #include "acpi.h" #include "driver.h" +#ifdef CONFIG_ACPI_KERNEL_CONFIG +#include +#define ACPI_CAN_USE_EFI_STRUCT +#endif + + #define _COMPONENT OS_DEPENDENT MODULE_NAME ("driver") @@ -193,8 +204,12 @@ acpi_do_event(ctl_table * ctl, return 0; } - for (;;) { + while (!event_status) { unsigned long flags; + DECLARE_WAITQUEUE(wait, current); + + set_current_state(TASK_INTERRUPTIBLE); + add_wait_queue(&acpi_event_wait, &wait); // we need an atomic exchange here spin_lock_irqsave(&acpi_event_lock, flags); @@ -203,11 +218,12 @@ acpi_do_event(ctl_table * ctl, spin_unlock_irqrestore(&acpi_event_lock, flags); event_state = acpi_event_state; - if (event_status) - break; + if (!event_status) + schedule(); + + remove_wait_queue(&acpi_event_wait, &wait); + set_current_state(TASK_RUNNING); - // wait for an event to arrive - interruptible_sleep_on(&acpi_event_wait); if (signal_pending(current)) return -ERESTARTSYS; } @@ -412,12 +428,14 @@ acpi_thread(void *context) return -ENODEV; } - /* arch-specific call to get rsdp ptr */ - rsdp_phys = acpi_get_rsdp_ptr(); - if (!rsdp_phys) { +#ifndef ACPI_CAN_USE_EFI_STRUCT + if (!ACPI_SUCCESS(acpi_find_root_pointer(&rsdp_phys))) { printk(KERN_ERR "ACPI: System description tables not found\n"); return -ENODEV; } +#else + rsdp_phys = efi.acpi; +#endif printk(KERN_ERR "ACPI: System description tables found\n"); @@ -443,7 +461,7 @@ acpi_thread(void *context) acpi_cpu_init(); acpi_sys_init(); acpi_ec_init(); - acpi_cmbatt_init(); + acpi_power_init(); /* * Non-intuitive: 0 means pwr and sleep are implemented using the fixed @@ -474,9 +492,20 @@ acpi_thread(void *context) * run */ for (;;) { - interruptible_sleep_on(&acpi_thread_wait); + DECLARE_WAITQUEUE(wait, current); + + set_current_state(TASK_INTERRUPTIBLE); + add_wait_queue(&acpi_thread_wait, &wait); + + if (list_empty(&acpi_thread_run)) + schedule(); + + remove_wait_queue(&acpi_thread_wait, &wait); + set_current_state(TASK_RUNNING); + if (signal_pending(current)) break; + run_task_queue(&acpi_thread_run); } diff --git a/drivers/acpi/driver.h b/drivers/acpi/driver.h index ea99f4d4814d..9abae5d56eb7 100644 --- a/drivers/acpi/driver.h +++ b/drivers/acpi/driver.h @@ -53,9 +53,9 @@ int acpi_run(void (*callback)(void*), void *context); int acpi_ec_init(void); /* - * cmbatt.c + * power.c */ -int acpi_cmbatt_init(void); +int acpi_power_init(void); /* * sys.c diff --git a/drivers/acpi/ec.c b/drivers/acpi/ec.c index 8bf7c0c687fb..8f6f61e36dc1 100644 --- a/drivers/acpi/ec.c +++ b/drivers/acpi/ec.c @@ -19,14 +19,11 @@ */ #include -#include -#include -#include #include -#include -#include +#include #include "acpi.h" #include "driver.h" +#include "ec.h" #define _COMPONENT OS_DEPENDENT MODULE_NAME ("ec") @@ -52,97 +49,369 @@ enum ACPI_EC_QUERY = 0x84, }; -struct ec_context +typedef struct { + ACPI_HANDLE acpi_handle; u32 gpe_bit; ACPI_IO_ADDRESS status_port; ACPI_IO_ADDRESS data_port; u32 need_global_lock; -}; +} ec_context_t; -static DECLARE_WAIT_QUEUE_HEAD(acpi_ec_wait); +typedef struct +{ + ec_context_t *ec; + u8 data; -/* - * handle GPE - */ -static void -acpi_ec_gpe(void *context) +} EC_QUERY_DATA; + +static char object_name[] = {'_', 'Q', '0', '0', '\0'}; + +static char hex[] = {'0','1','2','3','4','5','6','7','8','9','A','B','C','D','E','F'}; + + +static ACPI_STATUS +ec_io_wait ( + ec_context_t *ec, + EC_EVENT wait_event) { - printk(KERN_INFO "ACPI: EC GPE\n"); - /* TODO fix this to use per-device sem */ - if (waitqueue_active(&acpi_ec_wait)) - wake_up_interruptible(&acpi_ec_wait); + EC_STATUS ec_status = 0; + UINT32 i = 100; + + if (!ec || ((wait_event != EC_EVENT_OUTPUT_BUFFER_FULL) + && (wait_event != EC_EVENT_INPUT_BUFFER_EMPTY))) + return(AE_BAD_PARAMETER); + + /* + * Wait for Event: + * --------------- + * Poll the EC status register waiting for the event to occur. + * Note that we'll wait a maximum of 1ms in 10us chunks. + */ + switch (wait_event) { + case EC_EVENT_OUTPUT_BUFFER_FULL: + do { + ec_status = acpi_os_in8(ec->status_port); + if (ec_status & EC_FLAG_OUTPUT_BUFFER) + return(AE_OK); + acpi_os_sleep_usec(10); + } while (--i>0); + break; + case EC_EVENT_INPUT_BUFFER_EMPTY: + do { + ec_status = acpi_os_in8(ec->status_port); + if (!(ec_status & EC_FLAG_INPUT_BUFFER)) + return(AE_OK); + acpi_os_sleep_usec(10); + } while (--i>0); + break; + } + + return(AE_TIME); } -/* - * wait for read/write status to clear - */ -static void -acpi_ec_wait_control(struct ec_context *ec_cxt) +static ACPI_STATUS +ec_io_read ( + ec_context_t *ec, + ACPI_IO_ADDRESS io_port, + UINT8 *data, + EC_EVENT wait_event) { - udelay(1); - while(inb(ec_cxt->status_port) & ACPI_EC_IBF) - udelay(10); + ACPI_STATUS status = AE_OK; + + if (!ec || !data) + return(AE_BAD_PARAMETER); + + *data = acpi_os_in8(io_port); + + if (wait_event) + status = ec_io_wait(ec, wait_event); + + return(status); } -/* - * read a byte from the EC - */ -int -acpi_ec_read(struct ec_context *ec_cxt, - int addr, - int *value) +static ACPI_STATUS +ec_io_write ( + ec_context_t *ec, + ACPI_IO_ADDRESS io_port, + UINT8 data, + EC_EVENT wait_event) { - if (!ec_cxt->data_port || !ec_cxt->status_port) - return -1; + ACPI_STATUS status = AE_OK; - if (ec_cxt->need_global_lock) - acpi_acquire_global_lock(); + if (!ec) + return(AE_BAD_PARAMETER); - outb(ACPI_EC_READ, ec_cxt->status_port); - acpi_ec_wait_control(ec_cxt); - outb(addr, ec_cxt->data_port); - acpi_ec_wait_control(ec_cxt); - /*interruptible_sleep_on(&acpi_ec_wait);*/ - *value = inb(ec_cxt->data_port); + acpi_os_out8(io_port, data); - if (ec_cxt->need_global_lock) - acpi_release_global_lock(); + if (wait_event) + status = ec_io_wait(ec, wait_event); - return 0; + return(status); } -/* - * write a byte to the EC - */ -int -acpi_ec_write(struct ec_context *ec_cxt, - int addr, - int value) +static ACPI_STATUS +ec_read ( + ec_context_t *ec, + UINT8 address, + UINT8 *data) { - if (!ec_cxt->data_port || !ec_cxt->status_port) - return -1; + ACPI_STATUS status = AE_OK; - if (ec_cxt->need_global_lock) - acpi_acquire_global_lock(); + FUNCTION_TRACE("ec_read"); - outb(ACPI_EC_WRITE, ec_cxt->status_port); - acpi_ec_wait_control(ec_cxt); - outb(addr, ec_cxt->data_port); - acpi_ec_wait_control(ec_cxt); - outb(value, ec_cxt->data_port); - acpi_ec_wait_control(ec_cxt); - /*interruptible_sleep_on(&acpi_ec_wait);*/ + if (!ec || !data) + return_ACPI_STATUS(AE_BAD_PARAMETER); - if (ec_cxt->need_global_lock) - acpi_release_global_lock(); + status = ec_io_write(ec, ec->status_port, EC_COMMAND_READ, EC_EVENT_INPUT_BUFFER_EMPTY); + if (ACPI_FAILURE(status)) { + DEBUG_PRINT(ACPI_WARN, ("Unable to send 'read command' to EC.\n")); + return_ACPI_STATUS(status); + } - return 0; + status = ec_io_write(ec, ec->data_port, address, EC_EVENT_OUTPUT_BUFFER_FULL); + if (ACPI_FAILURE(status)) { + DEBUG_PRINT(ACPI_WARN, ("Unable to send 'read address' to EC.\n")); + return_ACPI_STATUS(status); + } + + status = ec_io_read(ec, ec->data_port, data, EC_EVENT_NONE); + + DEBUG_PRINT(ACPI_INFO, ("Read data[0x%02x] from address[0x%02x] on ec.\n", (*data), address)); + + return_ACPI_STATUS(status); +} + +static ACPI_STATUS +ec_write ( + ec_context_t *ec, + UINT8 address, + UINT8 data) +{ + ACPI_STATUS status = AE_OK; + + FUNCTION_TRACE("ec_write"); + + if (!ec) + return_ACPI_STATUS(AE_BAD_PARAMETER); + + status = ec_io_write(ec, ec->status_port, EC_COMMAND_WRITE, EC_EVENT_INPUT_BUFFER_EMPTY); + if (ACPI_FAILURE(status)) { + DEBUG_PRINT(ACPI_WARN, ("Unable to send 'write command' to EC.\n")); + return_ACPI_STATUS(status); + } + + status = ec_io_write(ec, ec->data_port, address, EC_EVENT_INPUT_BUFFER_EMPTY); + if (ACPI_FAILURE(status)) { + DEBUG_PRINT(ACPI_WARN, ("Unable to send 'write address' to EC.\n")); + return_ACPI_STATUS(status); + } + + status = ec_io_write(ec, ec->data_port, data, EC_EVENT_INPUT_BUFFER_EMPTY); + if (ACPI_FAILURE(status)) { + DEBUG_PRINT(ACPI_WARN, ("Unable to send 'write data' to EC.\n")); + return_ACPI_STATUS(status); + } + + DEBUG_PRINT(ACPI_INFO, ("Wrote data[0x%02x] to address[0x%02x] on ec.\n", data, address)); + + return_ACPI_STATUS(status); } static ACPI_STATUS -acpi_ec_region_setup ( +ec_transaction ( + ec_context_t *ec, + EC_REQUEST *request) +{ + ACPI_STATUS status = AE_OK; + + FUNCTION_TRACE("ec_transaction"); + + if (!ec || !request) + return_ACPI_STATUS(AE_BAD_PARAMETER); + + /* + * Obtaining semaphore (mutex) to serialize all EC transactions. + */ + /* + DEBUG_PRINT(ACPI_INFO, ("Calling acpi_os_wait_semaphore(%p, 1, %d)\n", ec->mutex, EC_DEFAULT_TIMEOUT)); + status = acpi_os_wait_semaphore(ec->mutex, 1, EC_DEFAULT_TIMEOUT); + if (ACPI_FAILURE(status)) + return_ACPI_STATUS(status); + */ + + /* + * Perform the transaction. + */ + switch (request->command) { + + case EC_COMMAND_READ: + status = ec_read(ec, request->address, &(request->data)); + break; + + case EC_COMMAND_WRITE: + status = ec_write(ec, request->address, request->data); + break; + + default: + status = AE_SUPPORT; + break; + } + + /* + * Signal the semaphore (mutex) to indicate transaction completion. + */ + /* + DEBUG_PRINT(ACPI_INFO, ("Calling acpi_os_signal_semaphore(%p, 1)\n", ec->mutex)); + acpi_os_signal_semaphore(ec->mutex, 1); + */ + + return_ACPI_STATUS(status); +} + +static ACPI_STATUS +ec_space_setup ( + ACPI_HANDLE region_handle, + UINT32 function, + void *handler_context, + void **return_context) +{ + // TODO: What is this function for? + /* + * The ec object is in the handler context and is needed + * when calling the ec_space_handler. + */ + *return_context = handler_context; + + return AE_OK; +} + + + + +static void +ec_query_handler ( + void *context) +{ + ACPI_STATUS status = AE_OK; + EC_QUERY_DATA *ec_q = (EC_QUERY_DATA*)context; + + FUNCTION_TRACE("ec_query_handler"); + + if (!ec_q || !ec_q->ec) { + DEBUG_PRINT(ACPI_ERROR, ("Invalid (NULL) context.\n")); + return_VOID; + } + + /* + * Evaluate _Qxx: + * -------------- + * Evaluate corresponding _Qxx method. Note that a zero query + * value indicates a spurious EC_SCI (no such thing as _Q00). + */ + object_name[2] = hex[((ec_q->data >> 4) & 0x0F)]; + object_name[3] = hex[(ec_q->data & 0x0F)]; + + DEBUG_PRINT(ACPI_INFO, ("Read query data[0x%02x] from ec - evaluating [%s].\n", ec_q->data, object_name)); + + status = acpi_evaluate_object(ec_q->ec->acpi_handle, object_name, NULL, NULL); + + kfree(ec_q); + + return_VOID; +} + +/* + * handle GPE + */ +static void +ec_gpe_handler(void *context) +{ + ACPI_STATUS status = AE_OK; + ec_context_t *ec = (ec_context_t *) context; + EC_QUERY_DATA *ec_q = NULL; + EC_STATUS ec_status = 0; + + FUNCTION_TRACE("ec_gpe_handler"); + + if (!ec) { + DEBUG_PRINT(ACPI_INFO, ("Invalid (NULL) context.\n")); + return_VOID; + } + + // GET SPINLOCK! + + /* + * EC_SCI? + * ------- + * Check the EC_SCI bit to see if this is an EC_SCI event. If not (e.g. + * OBF/IBE) just return, as we already poll to detect these events. + */ + ec_status = acpi_os_in8(ec->status_port); + DEBUG_PRINT(ACPI_INFO, ("EC Status Register: [0x%02x]\n", ec_status)); + if (!(ec_status & EC_FLAG_SCI)) + return_VOID; + + DEBUG_PRINT(ACPI_INFO, ("EC_SCI detected - running QUERY.\n")); + + // TODO: Need GFP_ATOMIC 'switch' for OSL interface... + ec_q = kmalloc(sizeof(EC_QUERY_DATA), GFP_ATOMIC); + if (!ec_q) { + DEBUG_PRINT(ACPI_INFO, ("Memory allocation failure.\n")); + return_VOID; + } + + ec_q->ec = ec; + ec_q->data = 0; + + /* + * Run Query: + * ---------- + * Query the EC to find out which _Qxx method we need to evaluate. + * Note that successful completion of the query causes the EC_SCI + * bit to be cleared (and thus clearing the interrupt source). + */ + status = ec_io_write(ec, ec->status_port, EC_COMMAND_QUERY, EC_EVENT_OUTPUT_BUFFER_FULL); + if (ACPI_FAILURE(status)) { + DEBUG_PRINT(ACPI_WARN, ("Unable to send 'query command' to EC.\n")); + goto End; + } + + status = ec_io_read(ec, ec->data_port, &(ec_q->data), EC_EVENT_NONE); + if (ACPI_FAILURE(status)) { + DEBUG_PRINT(ACPI_WARN, ("Error reading query data.\n")); + goto End; + } + + // RELEASE SPINLOCK! + + if (!ec_q->data) { + DEBUG_PRINT(ACPI_WARN, ("Spurious EC SCI detected.\n")); + status = AE_ERROR; + goto End; + } + + /* + * Defer _Qxx Execution: + * --------------------- + * Can't evaluate this method now 'cause we're at interrupt-level. + */ + status = acpi_os_queue_for_execution(OSD_PRIORITY_GPE, ec_query_handler, ec_q); + if (ACPI_FAILURE(status)) { + DEBUG_PRINT(ACPI_ERROR, ("Unable to defer _Qxx method evaluation.\n")); + goto End; + } + +End: + if (ACPI_FAILURE(status)) + kfree(ec_q); + + return_VOID; +} + +static ACPI_STATUS +ec_region_setup ( ACPI_HANDLE handle, u32 function, void *handler_context, @@ -168,44 +437,90 @@ acpi_ec_region_setup ( return_ACPI_STATUS (AE_OK); } +/***************************************************************************** + * + * FUNCTION: ec_region_handler + * + * PARAMETERS: function - Read or Write operation + * address - Where in the space to read or write + * bit_width - Field width in bits (8, 16, or 32) + * value - Pointer to in or out value + * context - context pointer + * + * RETURN: + * + * DESCRIPTION: Handler for the Embedded Controller (EC) address space + * (Op Region) + * + ****************************************************************************/ + static ACPI_STATUS -acpi_ec_region_handler (u32 function, - ACPI_PHYSICAL_ADDRESS address, - u32 bitwidth, - u32 *value, - void *handler_context, - void *region_context) +ec_region_handler ( + UINT32 function, + ACPI_PHYSICAL_ADDRESS address, + UINT32 bit_width, + UINT32 *value, + void *handler_context, + void *region_context) { - struct ec_context *ec_cxt; + ACPI_STATUS status = AE_OK; + ec_context_t *ec = NULL; + EC_REQUEST ec_request; - FUNCTION_TRACE("acpi_ec_region_handler"); + FUNCTION_TRACE("ec_space_handler"); - ec_cxt = handler_context; + if (address > 0xFF || bit_width != 8 || !value || !handler_context) + return_ACPI_STATUS(AE_BAD_PARAMETER); - if (function == ADDRESS_SPACE_READ) { - *value = 0; - acpi_ec_read(ec_cxt, address, value); - /*printk("EC read %x from %x\n", *value, address);*/ - } - else { - acpi_ec_write(ec_cxt, address, *value); - /*printk("EC write value %x to %x\n", *value, address);*/ - } - - return_ACPI_STATUS (AE_OK); + ec = (ec_context_t*)handler_context; + + switch (function) { + + case ADDRESS_SPACE_READ: + ec_request.command = EC_COMMAND_READ; + ec_request.address = address; + ec_request.data = 0; + break; + + case ADDRESS_SPACE_WRITE: + ec_request.command = EC_COMMAND_WRITE; + ec_request.address = address; + ec_request.data = (UINT8)(*value); + break; + + default: + DEBUG_PRINT(ACPI_WARN, ("Received request with invalid function [0x%08X].\n", function)); + return_ACPI_STATUS(AE_BAD_PARAMETER); + break; + } + + DEBUG_PRINT(ACPI_INFO, ("device[ec] command[0x%02X] address[0x%02X] data[0x%02X]\n", ec_request.command, ec_request.address, ec_request.data)); + + /* + * Perform the Transaction. + */ + status = ec_transaction(ec, &ec_request); + if (ACPI_SUCCESS(status)) + (*value) = (UINT32)ec_request.data; + + return_ACPI_STATUS(status); } /* * Get Embedded Controller information */ static ACPI_STATUS -acpi_found_ec(ACPI_HANDLE handle, u32 level, void *ctx, void **value) +found_ec( + ACPI_HANDLE handle, + u32 level, + void *ctx, + void **value) { ACPI_STATUS status; ACPI_OBJECT obj; ACPI_BUFFER buf; RESOURCE *res; - struct ec_context *ec_cxt; + ec_context_t *ec_cxt; buf.length = 0; buf.pointer = NULL; @@ -221,12 +536,14 @@ acpi_found_ec(ACPI_HANDLE handle, u32 level, void *ctx, void **value) return AE_OK; } - ec_cxt = kmalloc(sizeof(struct ec_context), GFP_KERNEL); + ec_cxt = kmalloc(sizeof(ec_context_t), GFP_KERNEL); if (!ec_cxt) { kfree(buf.pointer); return AE_NO_MEMORY; } + ec_cxt->acpi_handle = handle; + res = (RESOURCE*) buf.pointer; ec_cxt->data_port = res->data.io.min_base_address; res = NEXT_RESOURCE(res); @@ -264,17 +581,16 @@ acpi_found_ec(ACPI_HANDLE handle, u32 level, void *ctx, void **value) if (!ACPI_SUCCESS(acpi_install_gpe_handler( ec_cxt->gpe_bit, - (ACPI_EVENT_LEVEL_TRIGGERED - | ACPI_EVENT_EDGE_TRIGGERED), - acpi_ec_gpe, - NULL))) { + ACPI_EVENT_EDGE_TRIGGERED, + ec_gpe_handler, + ec_cxt))) { REPORT_ERROR(("Could not install GPE handler for EC.\n")); return AE_OK; } status = acpi_install_address_space_handler (handle, ADDRESS_SPACE_EC, - acpi_ec_region_handler, acpi_ec_region_setup, ec_cxt); + ec_region_handler, ec_region_setup, ec_cxt); if (!ACPI_SUCCESS(status)) { REPORT_ERROR(("Could not install EC address " @@ -288,7 +604,7 @@ int acpi_ec_init(void) { acpi_get_devices(ACPI_EC_HID, - acpi_found_ec, + found_ec, NULL, NULL); diff --git a/drivers/acpi/ec.h b/drivers/acpi/ec.h new file mode 100644 index 000000000000..e81356362470 --- /dev/null +++ b/drivers/acpi/ec.h @@ -0,0 +1,100 @@ +/* + * Copyright (C) 2000 Andrew Grover + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + + +#ifndef __EC_H__ +#define __EC_H__ + +// TODO: Linux-specific +#include +#include + +#include +#include + +/***************************************************************************** + * Types & Other Defines + *****************************************************************************/ + +#define EC_DEFAULT_TIMEOUT 1000 /* 1 second */ +#define EC_GPE_UNKNOWN 0xFFFFFFFF +#define EC_PORT_UNKNOWN 0x00000000 +#define EC_BURST_ENABLE_ACKNOWLEDGE 0x90 + +/* + * EC_COMMAND: + * ----------- + */ +typedef UINT8 EC_COMMAND; + +#define EC_COMMAND_UNKNOWN ((EC_COMMAND) 0x00) +#define EC_COMMAND_READ ((EC_COMMAND) 0x80) +#define EC_COMMAND_WRITE ((EC_COMMAND) 0x81) +#define EC_COMMAND_QUERY ((EC_COMMAND) 0x84) + +/* + * EC_STATUS: + * ---------- + * The encoding of the EC status register is illustrated below. + * Note that a set bit (1) indicates the property is TRUE + * (e.g. if bit 0 is set then the output buffer is full). + * +-+-+-+-+-+-+-+-+ + * |7|6|5|4|3|2|1|0| + * +-+-+-+-+-+-+-+-+ + * | | | | | | | | + * | | | | | | | +- Output Buffer Full (OBF)? + * | | | | | | +--- Input Buffer Full (IBF)? + * | | | | | +----- + * | | | | +------- data Register is command Byte? + * | | | +--------- Burst Mode Enabled? + * | | +----------- SCI event? + * | +------------- SMI event? + * +--------------- + * + */ +typedef UINT8 EC_STATUS; + +#define EC_FLAG_OUTPUT_BUFFER ((EC_STATUS) 0x01) +#define EC_FLAG_INPUT_BUFFER ((EC_STATUS) 0x02) +#define EC_FLAG_BURST_MODE ((EC_STATUS) 0x10) +#define EC_FLAG_SCI ((EC_STATUS) 0x20) + +/* + * EC_EVENT: + * --------- + */ +typedef UINT8 EC_EVENT; + +#define EC_EVENT_UNKNOWN ((EC_EVENT) 0x00) +#define EC_EVENT_NONE ((EC_EVENT) 0x00) +#define EC_EVENT_OUTPUT_BUFFER_FULL ((EC_EVENT) 0x01) +#define EC_EVENT_INPUT_BUFFER_EMPTY ((EC_EVENT) 0x02) +#define EC_EVENT_SCI ((EC_EVENT) 0x03) + +/* + * EC_REQUEST: + * ----------- + */ +typedef struct +{ + EC_COMMAND command; + UINT8 address; + UINT8 data; +} EC_REQUEST; + +#endif /* __EC_H__ */ diff --git a/drivers/acpi/events/evevent.c b/drivers/acpi/events/evevent.c index 4ae76eb59933..d5ce143a8899 100644 --- a/drivers/acpi/events/evevent.c +++ b/drivers/acpi/events/evevent.c @@ -2,7 +2,7 @@ * * Module Name: evevent - Fixed and General Purpose Acpi_event * handling and dispatch - * $Revision: 30 $ + * $Revision: 32 $ * *****************************************************************************/ @@ -375,10 +375,10 @@ acpi_ev_gpe_initialize (void) for (i = 0; i < gpe0register_count; i++) { acpi_gbl_gpe_registers[register_index].status_addr = - (u16) (acpi_gbl_FADT->Xgpe0blk.address + i); + (u16) (ACPI_GET_ADDRESS (acpi_gbl_FADT->Xgpe0blk.address) + i); acpi_gbl_gpe_registers[register_index].enable_addr = - (u16) (acpi_gbl_FADT->Xgpe0blk.address + i + gpe0register_count); + (u16) (ACPI_GET_ADDRESS (acpi_gbl_FADT->Xgpe0blk.address) + i + gpe0register_count); acpi_gbl_gpe_registers[register_index].gpe_base = (u8) MUL_8 (i); @@ -402,10 +402,10 @@ acpi_ev_gpe_initialize (void) for (i = 0; i < gpe1_register_count; i++) { acpi_gbl_gpe_registers[register_index].status_addr = - (u16) (acpi_gbl_FADT->Xgpe1_blk.address + i); + (u16) (ACPI_GET_ADDRESS (acpi_gbl_FADT->Xgpe1_blk.address) + i); acpi_gbl_gpe_registers[register_index].enable_addr = - (u16) (acpi_gbl_FADT->Xgpe1_blk.address + i + gpe1_register_count); + (u16) (ACPI_GET_ADDRESS (acpi_gbl_FADT->Xgpe1_blk.address) + i + gpe1_register_count); acpi_gbl_gpe_registers[register_index].gpe_base = (u8) (acpi_gbl_FADT->gpe1_base + MUL_8 (i)); diff --git a/drivers/acpi/events/evregion.c b/drivers/acpi/events/evregion.c index a1781d7c3aa2..53cae63925aa 100644 --- a/drivers/acpi/events/evregion.c +++ b/drivers/acpi/events/evregion.c @@ -1,7 +1,7 @@ /****************************************************************************** * * Module Name: evregion - ACPI Address_space (Op_region) handler dispatch - * $Revision: 90 $ + * $Revision: 93 $ * *****************************************************************************/ @@ -292,8 +292,8 @@ acpi_ev_address_space_dispatch ( * * FUNCTION: Acpi_ev_disassociate_region_from_handler * - * PARAMETERS: Handler_obj - Handler Object - * Region_obj - Region Object + * PARAMETERS: Region_obj - Region Object + * Acpi_ns_is_locked - Namespace Region Already Locked? * * RETURN: None * @@ -304,7 +304,8 @@ acpi_ev_address_space_dispatch ( void acpi_ev_disassociate_region_from_handler( - ACPI_OPERAND_OBJECT *region_obj) + ACPI_OPERAND_OBJECT *region_obj, + u8 acpi_ns_is_locked) { ACPI_OPERAND_OBJECT *handler_obj; ACPI_OPERAND_OBJECT *obj_desc; @@ -347,11 +348,19 @@ acpi_ev_disassociate_region_from_handler( *last_obj_ptr = obj_desc->region.next; obj_desc->region.next = NULL; /* Must clear field */ + if (acpi_ns_is_locked) { + acpi_cm_release_mutex (ACPI_MTX_NAMESPACE); + } + /* * Now stop region accesses by executing the _REG method */ acpi_ev_execute_reg_method (region_obj, 0); + if (acpi_ns_is_locked) { + acpi_cm_acquire_mutex (ACPI_MTX_NAMESPACE); + } + /* * Call the setup handler with the deactivate notification */ @@ -404,6 +413,7 @@ acpi_ev_disassociate_region_from_handler( * * PARAMETERS: Handler_obj - Handler Object * Region_obj - Region Object + * Acpi_ns_is_locked - Namespace Region Already Locked? * * RETURN: None * @@ -589,7 +599,7 @@ acpi_ev_addr_handler_helper ( * * First disconnect region for any previous handler (if any) */ - acpi_ev_disassociate_region_from_handler (obj_desc); + acpi_ev_disassociate_region_from_handler (obj_desc, FALSE); /* * Then connect the region to the new handler diff --git a/drivers/acpi/events/evxfregn.c b/drivers/acpi/events/evxfregn.c index ade6f08b950b..71116cfc8715 100644 --- a/drivers/acpi/events/evxfregn.c +++ b/drivers/acpi/events/evxfregn.c @@ -2,7 +2,7 @@ * * Module Name: evxfregn - External Interfaces, ACPI Operation Regions and * Address Spaces. - * $Revision: 24 $ + * $Revision: 26 $ * *****************************************************************************/ @@ -337,7 +337,7 @@ acpi_remove_address_space_handler ( * The region is just inaccessible as indicated to * the _REG method */ - acpi_ev_disassociate_region_from_handler(region_obj); + acpi_ev_disassociate_region_from_handler(region_obj, FALSE); /* * Walk the list, since we took the first region and it diff --git a/drivers/acpi/hardware/hwacpi.c b/drivers/acpi/hardware/hwacpi.c index da6f8c1ac97e..d2154a1a2646 100644 --- a/drivers/acpi/hardware/hwacpi.c +++ b/drivers/acpi/hardware/hwacpi.c @@ -1,7 +1,7 @@ /****************************************************************************** * * Module Name: hwacpi - ACPI hardware functions - mode and timer - * $Revision: 32 $ + * $Revision: 34 $ * *****************************************************************************/ @@ -126,7 +126,9 @@ acpi_hw_initialize ( * block is not fixed, so the buffer must be allocated with malloc */ - if (acpi_gbl_FADT->Xgpe0blk.address && acpi_gbl_FADT->gpe0blk_len) { + if (ACPI_VALID_ADDRESS (acpi_gbl_FADT->Xgpe0blk.address) && + acpi_gbl_FADT->gpe0blk_len) + { /* GPE0 specified in FADT */ acpi_gbl_gpe0enable_register_save = @@ -147,7 +149,9 @@ acpi_hw_initialize ( acpi_gbl_gpe0enable_register_save = NULL; } - if (acpi_gbl_FADT->Xgpe1_blk.address && acpi_gbl_FADT->gpe1_blk_len) { + if (ACPI_VALID_ADDRESS (acpi_gbl_FADT->Xgpe1_blk.address) && + acpi_gbl_FADT->gpe1_blk_len) + { /* GPE1 defined */ acpi_gbl_gpe1_enable_register_save = @@ -317,7 +321,7 @@ acpi_hw_pmt_ticks (void) { u32 ticks; - ticks = acpi_os_in32 ((ACPI_IO_ADDRESS) acpi_gbl_FADT->Xpm_tmr_blk.address); + ticks = acpi_os_in32 ((ACPI_IO_ADDRESS) ACPI_GET_ADDRESS (acpi_gbl_FADT->Xpm_tmr_blk.address)); return (ticks); } diff --git a/drivers/acpi/hardware/hwregs.c b/drivers/acpi/hardware/hwregs.c index 8d8d254755b8..77b6a1c8c444 100644 --- a/drivers/acpi/hardware/hwregs.c +++ b/drivers/acpi/hardware/hwregs.c @@ -3,7 +3,7 @@ * * Module Name: hwregs - Read/write access functions for the various ACPI * control and status registers. - * $Revision: 84 $ + * $Revision: 86 $ * ******************************************************************************/ @@ -90,8 +90,8 @@ acpi_hw_clear_acpi_status (void) acpi_hw_register_write (ACPI_MTX_DO_NOT_LOCK, PM1_STS, ALL_FIXED_STS_BITS); - if (acpi_gbl_FADT->Xpm1b_evt_blk.address) { - acpi_os_out16 ((ACPI_IO_ADDRESS) acpi_gbl_FADT->Xpm1b_evt_blk.address, + if (ACPI_VALID_ADDRESS (acpi_gbl_FADT->Xpm1b_evt_blk.address)) { + acpi_os_out16 ((ACPI_IO_ADDRESS) ACPI_GET_ADDRESS (acpi_gbl_FADT->Xpm1b_evt_blk.address), (u16) ALL_FIXED_STS_BITS); } @@ -101,7 +101,7 @@ acpi_hw_clear_acpi_status (void) gpe_length = (u16) DIV_2 (acpi_gbl_FADT->gpe0blk_len); for (index = 0; index < gpe_length; index++) { - acpi_os_out8 ((ACPI_IO_ADDRESS) (acpi_gbl_FADT->Xgpe0blk.address + index), + acpi_os_out8 ((ACPI_IO_ADDRESS) (ACPI_GET_ADDRESS (acpi_gbl_FADT->Xgpe0blk.address) + index), (u8) 0xff); } } @@ -110,7 +110,7 @@ acpi_hw_clear_acpi_status (void) gpe_length = (u16) DIV_2 (acpi_gbl_FADT->gpe1_blk_len); for (index = 0; index < gpe_length; index++) { - acpi_os_out8 ((ACPI_IO_ADDRESS) (acpi_gbl_FADT->Xgpe1_blk.address + index), + acpi_os_out8 ((ACPI_IO_ADDRESS) (ACPI_GET_ADDRESS (acpi_gbl_FADT->Xgpe1_blk.address) + index), (u8) 0xff); } } @@ -816,7 +816,7 @@ acpi_hw_low_level_read ( * a non-zero address within */ if ((!reg) || - (!reg->address)) + (!ACPI_VALID_ADDRESS (reg->address))) { return 0; } @@ -831,7 +831,7 @@ acpi_hw_low_level_read ( { case ADDRESS_SPACE_SYSTEM_MEMORY: - mem_address = (ACPI_PHYSICAL_ADDRESS) reg->address + offset; + mem_address = (ACPI_PHYSICAL_ADDRESS) (ACPI_GET_ADDRESS (reg->address) + offset); switch (width) { @@ -850,7 +850,7 @@ acpi_hw_low_level_read ( case ADDRESS_SPACE_SYSTEM_IO: - io_address = (ACPI_IO_ADDRESS) reg->address + offset; + io_address = (ACPI_IO_ADDRESS) (ACPI_GET_ADDRESS (reg->address) + offset); switch (width) { @@ -869,8 +869,8 @@ acpi_hw_low_level_read ( case ADDRESS_SPACE_PCI_CONFIG: - pci_dev_func = ACPI_PCI_DEVFUN (reg->address); - pci_register = ACPI_PCI_REGISTER (reg->address) + offset; + pci_dev_func = ACPI_PCI_DEVFUN (ACPI_GET_ADDRESS (reg->address)); + pci_register = ACPI_PCI_REGISTER (ACPI_GET_ADDRESS (reg->address)) + offset; switch (width) { @@ -925,7 +925,7 @@ acpi_hw_low_level_write ( * a non-zero address within */ if ((!reg) || - (!reg->address)) + (!ACPI_VALID_ADDRESS (reg->address))) { return; } @@ -940,7 +940,7 @@ acpi_hw_low_level_write ( { case ADDRESS_SPACE_SYSTEM_MEMORY: - mem_address = (ACPI_PHYSICAL_ADDRESS) reg->address + offset; + mem_address = (ACPI_PHYSICAL_ADDRESS) (ACPI_GET_ADDRESS (reg->address) + offset); switch (width) { @@ -959,7 +959,7 @@ acpi_hw_low_level_write ( case ADDRESS_SPACE_SYSTEM_IO: - io_address = (ACPI_IO_ADDRESS) reg->address + offset; + io_address = (ACPI_IO_ADDRESS) (ACPI_GET_ADDRESS (reg->address) + offset); switch (width) { @@ -978,8 +978,8 @@ acpi_hw_low_level_write ( case ADDRESS_SPACE_PCI_CONFIG: - pci_dev_func = ACPI_PCI_DEVFUN (reg->address); - pci_register = ACPI_PCI_REGISTER (reg->address) + offset; + pci_dev_func = ACPI_PCI_DEVFUN (ACPI_GET_ADDRESS (reg->address)); + pci_register = ACPI_PCI_REGISTER (ACPI_GET_ADDRESS (reg->address)) + offset; switch (width) { diff --git a/drivers/acpi/include/acevents.h b/drivers/acpi/include/acevents.h index 706862d24907..3e76370bf5e3 100644 --- a/drivers/acpi/include/acevents.h +++ b/drivers/acpi/include/acevents.h @@ -1,7 +1,7 @@ /****************************************************************************** * * Name: acevents.h - Event subcomponent prototypes and defines - * $Revision: 60 $ + * $Revision: 62 $ * *****************************************************************************/ @@ -123,7 +123,8 @@ acpi_ev_addr_handler_helper ( void acpi_ev_disassociate_region_from_handler( - ACPI_OPERAND_OBJECT *region_obj); + ACPI_OPERAND_OBJECT *region_obj, + u8 acpi_ns_is_locked); ACPI_STATUS diff --git a/drivers/acpi/include/aclinux.h b/drivers/acpi/include/aclinux.h index 40446b08c559..7b5407654829 100644 --- a/drivers/acpi/include/aclinux.h +++ b/drivers/acpi/include/aclinux.h @@ -1,7 +1,7 @@ /****************************************************************************** * * Name: aclinux.h - OS specific defines, etc. - * $Revision: 4 $ + * $Revision: 6 $ * *****************************************************************************/ @@ -48,20 +48,4 @@ #define ACPI_NO_INTEGER64_SUPPORT #endif -#if 0 - -/* Use native Linux string library */ - -#define ACPI_USE_SYSTEM_CLIBRARY - -/* Special functions */ - -#define strtoul simple_strtoul - -/* Linux clib doesn't to strupr, but we do. */ -char * -strupr(char *str); - -#endif /* 0 */ - #endif /* __ACLINUX_H__ */ diff --git a/drivers/acpi/include/aclocal.h b/drivers/acpi/include/aclocal.h index 965d2ad117ee..a647026f1573 100644 --- a/drivers/acpi/include/aclocal.h +++ b/drivers/acpi/include/aclocal.h @@ -1,7 +1,7 @@ /****************************************************************************** * * Name: aclocal.h - Internal data types used across the ACPI subsystem - * $Revision: 93 $ + * $Revision: 95 $ * *****************************************************************************/ @@ -178,8 +178,11 @@ typedef struct acpi_node #define ANOBJ_AML_ATTACHMENT 0x01 #define ANOBJ_END_OF_PEER_LIST 0x02 #define ANOBJ_DATA_WIDTH_32 0x04 /* Parent table is 64-bits */ -#define ANOBJ_METHOD_ARG 0x40 -#define ANOBJ_METHOD_LOCAL 0x80 +#define ANOBJ_METHOD_ARG 0x08 +#define ANOBJ_METHOD_LOCAL 0x10 +#define ANOBJ_METHOD_NO_RETVAL 0x20 +#define ANOBJ_METHOD_SOME_NO_RETVAL 0x40 + /* * ACPI Table Descriptor. One per ACPI table diff --git a/drivers/acpi/include/acmacros.h b/drivers/acpi/include/acmacros.h index c1fcd376f9bf..19cfa05913d7 100644 --- a/drivers/acpi/include/acmacros.h +++ b/drivers/acpi/include/acmacros.h @@ -1,7 +1,7 @@ /****************************************************************************** * * Name: acmacros.h - C macros for the entire subsystem. - * $Revision: 56 $ + * $Revision: 59 $ * *****************************************************************************/ @@ -63,6 +63,15 @@ #define HI_LIMIT(b) ((u8) (((b) & 0x00FF0000) >> 16)) +#ifdef _IA16 +#define ACPI_GET_ADDRESS(a) ((a).lo) +#define ACPI_STORE_ADDRESS(a,b) {(a).hi=0;(a).lo=(b);} +#define ACPI_VALID_ADDRESS(a) ((a).hi && (a).lo) +#else +#define ACPI_GET_ADDRESS(a) (a) +#define ACPI_STORE_ADDRESS(a,b) ((a)=(b)) +#define ACPI_VALID_ADDRESS(a) (a) +#endif /* * Extract a byte of data using a pointer. Any more than a byte and we * get into potential aligment issues -- see the STORE macros below @@ -167,9 +176,16 @@ #define ACPI_PCI_FUNCTION(a) (u32) ((((a) & ACPI_PCI_FUNCTION_MASK) >> 16)) #define ACPI_PCI_DEVICE(a) (u32) ((((a) & ACPI_PCI_DEVICE_MASK) >> 32)) + +#ifndef _IA16 #define ACPI_PCI_REGISTER(a) (u32) (((a) & ACPI_PCI_REGISTER_MASK)) #define ACPI_PCI_DEVFUN(a) (u32) ((ACPI_PCI_DEVICE(a) << 16) | ACPI_PCI_FUNCTION(a)) +#else +#define ACPI_PCI_REGISTER(a) (u32) (((a) & 0x0000FFFF)) +#define ACPI_PCI_DEVFUN(a) (u32) ((((a) & 0xFFFF0000) >> 16)) + +#endif /* * An ACPI_HANDLE (which is actually an ACPI_NAMESPACE_NODE *) can appear in some contexts, diff --git a/drivers/acpi/include/acnamesp.h b/drivers/acpi/include/acnamesp.h index 55006650bfca..e010a811881d 100644 --- a/drivers/acpi/include/acnamesp.h +++ b/drivers/acpi/include/acnamesp.h @@ -1,7 +1,7 @@ /****************************************************************************** * * Name: acnamesp.h - Namespace subcomponent prototypes and defines - * $Revision: 98 $ + * $Revision: 100 $ * *****************************************************************************/ @@ -56,6 +56,7 @@ #define NS_SEARCH_PARENT 0x01 #define NS_DONT_OPEN_SCOPE 0x02 #define NS_NO_PEER_SEARCH 0x04 +#define NS_ERROR_IF_FOUND 0x08 #define NS_WALK_UNLOCK TRUE #define NS_WALK_NO_UNLOCK FALSE diff --git a/drivers/acpi/include/acpiosxf.h b/drivers/acpi/include/acpiosxf.h index 47b1f28e5221..2f9eb4c13097 100644 --- a/drivers/acpi/include/acpiosxf.h +++ b/drivers/acpi/include/acpiosxf.h @@ -1,9 +1,9 @@ /****************************************************************************** * - * Name: acpiosd.h - All interfaces to the OS-dependent layer. These - * interfaces must be implemented by the OS-dependent - * front-end to the ACPI subsystem. + * Name: acpiosxf.h - All interfaces to the OS-dependent layer. These + * interfaces must be implemented by the OS-dependent + * front-end to the ACPI subsystem. * *****************************************************************************/ @@ -35,10 +35,10 @@ /* Priorities for Acpi_os_queue_for_execution */ -#define OSD_PRIORITY_HIGH 1 -#define OSD_PRIORITY_MED 2 -#define OSD_PRIORITY_LO 3 -#define OSD_PRIORITY_GPE OSD_PRIORITY_HIGH +#define OSD_PRIORITY_GPE 1 +#define OSD_PRIORITY_HIGH 2 +#define OSD_PRIORITY_MED 3 +#define OSD_PRIORITY_LO 4 #define ACPI_NO_UNIT_LIMIT ((u32) -1) #define ACPI_MUTEX_SEM 1 diff --git a/drivers/acpi/include/acpixf.h b/drivers/acpi/include/acpixf.h index 96b013b61612..d70fa75a78b7 100644 --- a/drivers/acpi/include/acpixf.h +++ b/drivers/acpi/include/acpixf.h @@ -1,7 +1,7 @@ /****************************************************************************** * - * Name: acxface.h - External interfaces to the ACPI subsystem + * Name: acpixf.h - External interfaces to the ACPI subsystem * *****************************************************************************/ diff --git a/drivers/acpi/include/actypes.h b/drivers/acpi/include/actypes.h index a29d5c24dfd9..dfa28a9d9c6b 100644 --- a/drivers/acpi/include/actypes.h +++ b/drivers/acpi/include/actypes.h @@ -1,7 +1,7 @@ /****************************************************************************** * * Name: actypes.h - Common data types for the entire ACPI subsystem - * $Revision: 155 $ + * $Revision: 159 $ * *****************************************************************************/ @@ -85,12 +85,19 @@ typedef long INT32; typedef int INT16; typedef unsigned long UINT32; +typedef struct +{ + UINT32 Lo; + UINT32 Hi; + +} UINT64; + typedef UINT16 NATIVE_UINT; typedef INT16 NATIVE_INT; typedef UINT32 ACPI_TBLPTR; typedef UINT32 ACPI_IO_ADDRESS; -typedef UINT32 ACPI_PHYSICAL_ADDRESS; +typedef void *ACPI_PHYSICAL_ADDRESS; #define ALIGNED_ADDRESS_BOUNDARY 0x00000002 #define _HW_ALIGNMENT_SUPPORT @@ -197,19 +204,23 @@ typedef void* ACPI_HANDLE; /* Actually a ptr to an */ #ifdef ACPI_NO_INTEGER64_SUPPORT -/* 32-bit Integers */ +/* 32-bit integers only, no 64-bit support */ typedef u32 ACPI_INTEGER; -#define ACPI_INTEGER_MAX ACPI_UINT32_MAX; +#define ACPI_INTEGER_MAX ACPI_UINT32_MAX #define ACPI_INTEGER_BIT_SIZE 32 +#define ACPI_MAX_BCD_VALUE 99999999 +#define ACPI_MAX_BCD_DIGITS 8 #else -/* 64-bit Integers */ +/* 64-bit integers */ typedef UINT64 ACPI_INTEGER; -#define ACPI_INTEGER_MAX ACPI_UINT64_MAX; +#define ACPI_INTEGER_MAX ACPI_UINT64_MAX #define ACPI_INTEGER_BIT_SIZE 64 +#define ACPI_MAX_BCD_VALUE 9999999999999999 +#define ACPI_MAX_BCD_DIGITS 16 #endif @@ -676,7 +687,7 @@ typedef struct typedef struct { - UINT64 mapped_physical_address; + ACPI_PHYSICAL_ADDRESS mapped_physical_address; u8 *mapped_logical_address; u32 mapped_length; } MEM_HANDLER_CONTEXT; diff --git a/drivers/acpi/interpreter/ammonad.c b/drivers/acpi/interpreter/ammonad.c index ac721583e115..df9671c0674e 100644 --- a/drivers/acpi/interpreter/ammonad.c +++ b/drivers/acpi/interpreter/ammonad.c @@ -2,7 +2,7 @@ /****************************************************************************** * * Module Name: ammonad - ACPI AML (p-code) execution for monadic operators - * $Revision: 85 $ + * $Revision: 88 $ * *****************************************************************************/ @@ -239,10 +239,9 @@ acpi_aml_exec_monadic2_r ( ACPI_OPERAND_OBJECT *ret_desc2 = NULL; u32 res_val; ACPI_STATUS status; - u32 d0; - u32 d1; - u32 d2; - u32 d3; + u32 i; + u32 j; + ACPI_INTEGER digit; /* Resolve all operands */ @@ -330,19 +329,32 @@ acpi_aml_exec_monadic2_r ( case AML_FROM_BCD_OP: - /* TBD: for ACPI 2.0, expand to 64 bits */ + /* + * The 64-bit ACPI integer can hold 16 4-bit BCD integers + */ + ret_desc->number.value = 0; + for (i = 0; i < ACPI_MAX_BCD_DIGITS; i++) { + /* Get one BCD digit */ - d0 = (u32) (obj_desc->number.value & 15); - d1 = (u32) (obj_desc->number.value >> 4 & 15); - d2 = (u32) (obj_desc->number.value >> 8 & 15); - d3 = (u32) (obj_desc->number.value >> 12 & 15); + digit = (ACPI_INTEGER) ((obj_desc->number.value >> (i * 4)) & 0xF); - if (d0 > 9 || d1 > 9 || d2 > 9 || d3 > 9) { - status = AE_AML_NUMERIC_OVERFLOW; - goto cleanup; - } + /* Check the range of the digit */ + + if (digit > 9) { + status = AE_AML_NUMERIC_OVERFLOW; + goto cleanup; + } + + if (digit > 0) { + /* Sum into the result with the appropriate power of 10 */ - ret_desc->number.value = d0 + d1 * 10 + d2 * 100 + d3 * 1000; + for (j = 0; j < i; j++) { + digit *= 10; + } + + ret_desc->number.value += digit; + } + } break; @@ -350,19 +362,27 @@ acpi_aml_exec_monadic2_r ( case AML_TO_BCD_OP: - /* TBD: for ACPI 2.0, expand to 64 bits */ - if (obj_desc->number.value > 9999) { + if (obj_desc->number.value > ACPI_MAX_BCD_VALUE) { status = AE_AML_NUMERIC_OVERFLOW; goto cleanup; } - ret_desc->number.value - = ACPI_MODULO (obj_desc->number.value, 10) - + (ACPI_MODULO (ACPI_DIVIDE (obj_desc->number.value, 10), 10) << 4) - + (ACPI_MODULO (ACPI_DIVIDE (obj_desc->number.value, 100), 10) << 8) - + (ACPI_MODULO (ACPI_DIVIDE (obj_desc->number.value, 1000), 10) << 12); + ret_desc->number.value = 0; + for (i = 0; i < ACPI_MAX_BCD_DIGITS; i++) { + /* Divide by nth factor of 10 */ + digit = obj_desc->number.value; + for (j = 0; j < i; j++) { + digit /= 10; + } + + /* Create the BCD digit */ + + if (digit > 0) { + ret_desc->number.value += (ACPI_MODULO (digit, 10) << (i * 4)); + } + } break; @@ -404,7 +424,7 @@ acpi_aml_exec_monadic2_r ( /* The object exists in the namespace, return TRUE */ - ret_desc->number.value = ACPI_INTEGER_MAX + ret_desc->number.value = ACPI_INTEGER_MAX; goto cleanup; break; diff --git a/drivers/acpi/interpreter/amprep.c b/drivers/acpi/interpreter/amprep.c index a4aa6b834217..266cb0105e68 100644 --- a/drivers/acpi/interpreter/amprep.c +++ b/drivers/acpi/interpreter/amprep.c @@ -2,7 +2,7 @@ /****************************************************************************** * * Module Name: amprep - ACPI AML (p-code) execution - field prep utilities - * $Revision: 69 $ + * $Revision: 72 $ * *****************************************************************************/ @@ -50,13 +50,25 @@ static u32 acpi_aml_decode_field_access_type ( - u32 access) + u32 access, + u16 length) { switch (access) { case ACCESS_ANY_ACC: - return (8); + if (length <= 8) { + return (8); + } + else if (length <= 16) { + return (16); + } + else if (length <= 32) { + return (32); + } + else { + return (8); + } break; case ACCESS_BYTE_ACC: @@ -131,7 +143,7 @@ acpi_aml_prep_common_field_object ( /* Decode the access type so we can compute offsets */ - granularity = acpi_aml_decode_field_access_type (obj_desc->field.access); + granularity = acpi_aml_decode_field_access_type (obj_desc->field.access, obj_desc->field.length); if (!granularity) { return (AE_AML_OPERAND_VALUE); } diff --git a/drivers/acpi/interpreter/amutils.c b/drivers/acpi/interpreter/amutils.c index 2c5e803fb74f..4e1359888d38 100644 --- a/drivers/acpi/interpreter/amutils.c +++ b/drivers/acpi/interpreter/amutils.c @@ -2,7 +2,7 @@ /****************************************************************************** * * Module Name: amutils - interpreter/scanner utilities - * $Revision: 64 $ + * $Revision: 66 $ * *****************************************************************************/ @@ -167,7 +167,7 @@ acpi_aml_truncate_for32bit_table ( * We are running a method that exists in a 32-bit ACPI table. * Truncate the value to 32 bits by zeroing out the upper 32-bit field */ - obj_desc->number.value &= (UINT64) ACPI_UINT32_MAX; + obj_desc->number.value &= (ACPI_INTEGER) ACPI_UINT32_MAX; } } diff --git a/drivers/acpi/namespace/nsaccess.c b/drivers/acpi/namespace/nsaccess.c index 4b420edff12c..27c02e22af0f 100644 --- a/drivers/acpi/namespace/nsaccess.c +++ b/drivers/acpi/namespace/nsaccess.c @@ -1,7 +1,7 @@ /******************************************************************************* * * Module Name: nsaccess - Top-level functions for accessing ACPI namespace - * $Revision: 115 $ + * $Revision: 117 $ * ******************************************************************************/ @@ -209,14 +209,14 @@ unlock_and_exit: * * FUNCTION: Acpi_ns_lookup * - * PARAMETERS: Prefix_node - Search scope if name is not fully qualified + * PARAMETERS: Prefix_node - Search scope if name is not fully qualified * Pathname - Search pathname, in internal format * (as represented in the AML stream) * Type - Type associated with name * Interpreter_mode - IMODE_LOAD_PASS2 => add name if not found * Flags - Flags describing the search restrictions * Walk_state - Current state of the walk - * Return_node - Where the Node is placed (if found + * Return_node - Where the Node is placed (if found * or created successfully) * * RETURN: Status diff --git a/drivers/acpi/namespace/nssearch.c b/drivers/acpi/namespace/nssearch.c index 78cb405854bb..001f57d9d153 100644 --- a/drivers/acpi/namespace/nssearch.c +++ b/drivers/acpi/namespace/nssearch.c @@ -1,7 +1,7 @@ /******************************************************************************* * * Module Name: nssearch - Namespace search - * $Revision: 60 $ + * $Revision: 62 $ * ******************************************************************************/ @@ -289,6 +289,16 @@ acpi_ns_search_and_enter ( status = acpi_ns_search_node (target_name, node, type, return_node); if (status != AE_NOT_FOUND) { + /* + * If we found it AND the request specifies that a + * find is an error, return the error + */ + if ((status == AE_OK) && + (flags & NS_ERROR_IF_FOUND)) + { + status = AE_EXIST; + } + /* * Either found it or there was an error * -- finished either way diff --git a/drivers/acpi/parser/psargs.c b/drivers/acpi/parser/psargs.c index 9115ac0363bd..35d623668367 100644 --- a/drivers/acpi/parser/psargs.c +++ b/drivers/acpi/parser/psargs.c @@ -1,7 +1,7 @@ /****************************************************************************** * * Module Name: psargs - Parse AML opcode arguments - * $Revision: 40 $ + * $Revision: 42 $ * *****************************************************************************/ @@ -401,7 +401,7 @@ acpi_ps_get_next_namepath ( /* * Either we didn't find the object in the namespace, or the object is * something other than a control method. Just initialize the Op with the - * pathname + * pathname. */ acpi_ps_init_op (arg, AML_NAMEPATH_OP); diff --git a/drivers/acpi/parser/psparse.c b/drivers/acpi/parser/psparse.c index 6066b27e3537..7471efdcbaa7 100644 --- a/drivers/acpi/parser/psparse.c +++ b/drivers/acpi/parser/psparse.c @@ -1,7 +1,7 @@ /****************************************************************************** * * Module Name: psparse - Parser top level AML parse routines - * $Revision: 69 $ + * $Revision: 71 $ * *****************************************************************************/ @@ -1044,7 +1044,6 @@ acpi_ps_parse_aml ( ACPI_WALK_LIST *prev_walk_list = acpi_gbl_current_walk_list; ACPI_OPERAND_OBJECT *return_desc; ACPI_OPERAND_OBJECT *mth_desc = NULL; - ACPI_NAMESPACE_NODE *start_node; /* Create and initialize a new parser state */ @@ -1082,19 +1081,16 @@ acpi_ps_parse_aml ( if (method_node) { - start_node = method_node; parser_state->start_node = method_node; walk_state->walk_type = WALK_METHOD; - if (start_node) { - /* Push start scope on scope stack and make it current */ - - status = acpi_ds_scope_stack_push (start_node, ACPI_TYPE_METHOD, walk_state); - if (ACPI_FAILURE (status)) { - return (status); - } + /* Push start scope on scope stack and make it current */ + status = acpi_ds_scope_stack_push (method_node, ACPI_TYPE_METHOD, walk_state); + if (ACPI_FAILURE (status)) { + return (status); } + /* Init arguments if this is a control method */ /* TBD: [Restructure] add walkstate as a param */ @@ -1105,6 +1101,8 @@ acpi_ps_parse_aml ( /* Setup the current scope */ node = parser_state->start_op->node; + parser_state->start_node = node; + if (node) { /* Push start scope on scope stack and make it current */ diff --git a/drivers/acpi/power.c b/drivers/acpi/power.c new file mode 100644 index 000000000000..0422bf94dfd9 --- /dev/null +++ b/drivers/acpi/power.c @@ -0,0 +1,137 @@ +/* + * power.c - Overall power driver. Also handles AC adapter device. + * + * Copyright (C) 2000 Andrew Grover + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include "acpi.h" +#include "driver.h" + +#define _COMPONENT OS_DEPENDENT + MODULE_NAME ("power") + +int acpi_cmbatt_init(void); +int acpi_cmbatt_terminate(void); + +/* ACPI-specific defines */ +#define ACPI_AC_ADAPTER_HID "ACPI0003" + +static int ac_count = 0; +static ACPI_HANDLE ac_handle = 0; + +/* + * We found a device with the correct HID + */ +static ACPI_STATUS +acpi_found_ac_adapter(ACPI_HANDLE handle, u32 level, void *ctx, void **value) +{ + ACPI_DEVICE_INFO info; + + if (ac_count > 0) { + printk(KERN_ERR "AC Adapter: more than one!\n"); + return (AE_OK); + } + + if (!ACPI_SUCCESS(acpi_get_object_info(handle, &info))) { + printk(KERN_ERR "AC Adapter: Could not get AC Adapter object info\n"); + return (AE_OK); + } + + if (!(info.valid & ACPI_VALID_STA)) { + printk(KERN_ERR "AC Adapter: Battery _STA invalid\n"); + return AE_OK; + } + + printk(KERN_INFO "AC Adapter: found\n"); + + ac_handle = handle; + + ac_count++; + + return AE_OK; +} + +static int +proc_read_ac_adapter_status(char *page, char **start, off_t off, + int count, int *eof, void *data) +{ + ACPI_OBJECT obj; + ACPI_BUFFER buf; + + char *p = page; + int len; + + buf.length = sizeof(obj); + buf.pointer = &obj; + if (!ACPI_SUCCESS(acpi_evaluate_object(ac_handle, "_PSR", NULL, &buf)) + || obj.type != ACPI_TYPE_NUMBER) { + p += sprintf(p, "Could not read AC status\n"); + goto end; + } + + if (obj.number.value) + p += sprintf(p, "on-line\n"); + else + p += sprintf(p, "off-line\n"); + +end: + len = (p - page); + if (len <= off+count) *eof = 1; + *start = page + off; + len -= off; + if (len>count) len = count; + if (len<0) len = 0; + return len; +} + +int +acpi_power_init(void) +{ + acpi_get_devices(ACPI_AC_ADAPTER_HID, + acpi_found_ac_adapter, + NULL, + NULL); + + if (!proc_mkdir("power", NULL)) + return 0; + + if (ac_handle) { + create_proc_read_entry("power/ac", 0, NULL, + proc_read_ac_adapter_status, NULL); + } + + acpi_cmbatt_init(); + + return 0; +} + +int +acpi_power_terminate(void) +{ + acpi_cmbatt_terminate(); + + if (ac_handle) { + remove_proc_entry("power/ac", NULL); + } + + remove_proc_entry("power", NULL); + + return 0; +} diff --git a/drivers/acpi/sys.c b/drivers/acpi/sys.c index a920e399e451..13648c255023 100644 --- a/drivers/acpi/sys.c +++ b/drivers/acpi/sys.c @@ -129,6 +129,8 @@ int acpi_enter_sx(acpi_sstate_t state) { struct acpi_enter_sx_ctx ctx; + DECLARE_WAITQUEUE(wait, current); + int ret = 0; if ((STRNCMP(acpi_fadt.header.signature, ACPI_FADT_SIGNATURE, ACPI_SIG_LEN) != 0) || acpi_slptyp[state] == ACPI_INVALID) @@ -137,14 +139,22 @@ acpi_enter_sx(acpi_sstate_t state) init_waitqueue_head(&ctx.wait); ctx.state = state; + set_current_state(TASK_INTERRUPTIBLE); + add_wait_queue(&ctx.wait, &wait); + if (acpi_os_queue_for_execution(0, acpi_enter_sx_async, &ctx)) - return -1; + ret = -1; - interruptible_sleep_on(&ctx.wait); - if (signal_pending(current)) - return -ERESTARTSYS; + if (!ret) + schedule(); - return 0; + set_current_state(TASK_RUNNING); + remove_wait_queue(&ctx.wait, &wait); + + if (!ret && signal_pending(current)) + ret = -ERESTARTSYS; + + return ret; } int diff --git a/drivers/block/loop.c b/drivers/block/loop.c index dc5ecd1519f7..6ca603d8f2a5 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -453,7 +453,7 @@ static int loop_set_fd(struct loop_device *lo, kdev_t dev, unsigned int arg) structure that the caller is using */ lo->lo_device = inode->i_dev; - lo->lo_flags = LO_FLAGS_DO_BMAP; + lo->lo_flags |= LO_FLAGS_DO_BMAP; error = -ENFILE; lo->lo_backing_file = get_empty_filp(); diff --git a/drivers/char/Config.in b/drivers/char/Config.in index e2e57cc90763..c62f1a38e660 100644 --- a/drivers/char/Config.in +++ b/drivers/char/Config.in @@ -178,7 +178,7 @@ endmenu tristate '/dev/agpgart (AGP Support)' CONFIG_AGP $CONFIG_DRM_AGP if [ "$CONFIG_AGP" != "n" ]; then - bool ' Intel 440LX/BX/GX and I815/I840 support' CONFIG_AGP_INTEL + bool ' Intel 440LX/BX/GX and I815/I840/I850 support' CONFIG_AGP_INTEL bool ' Intel I810/I815 (on-board) support' CONFIG_AGP_I810 bool ' VIA chipset support' CONFIG_AGP_VIA bool ' AMD Irongate support' CONFIG_AGP_AMD diff --git a/drivers/char/Makefile b/drivers/char/Makefile index de73c312dcd5..77ce381455b6 100644 --- a/drivers/char/Makefile +++ b/drivers/char/Makefile @@ -25,6 +25,8 @@ export-objs := busmouse.o console.o keyboard.o sysrq.o \ misc.o pty.o random.o selection.o serial.o \ tty_io.o +mod-subdirs := joystick ftape drm pcmcia + list-multi := KEYMAP =defkeymap.o diff --git a/drivers/char/agp/agp.h b/drivers/char/agp/agp.h index a054af74583b..33743376a92e 100644 --- a/drivers/char/agp/agp.h +++ b/drivers/char/agp/agp.h @@ -164,6 +164,9 @@ struct agp_bridge_data { #ifndef PCI_DEVICE_ID_INTEL_840_0 #define PCI_DEVICE_ID_INTEL_840_0 0x1a21 #endif +#ifndef PCI_DEVICE_ID_INTEL_850_0 +#define PCI_DEVICE_ID_INTEL_850_0 0x2530 +#endif #ifndef PCI_DEVICE_ID_INTEL_810_DC100_0 #define PCI_DEVICE_ID_INTEL_810_DC100_0 0x7122 #endif @@ -213,6 +216,10 @@ struct agp_bridge_data { #define INTEL_I840_MCHCFG 0x50 #define INTEL_I840_ERRSTS 0xc8 +/* intel i850 registers */ +#define INTEL_I850_MCHCFG 0x50 +#define INTEL_I850_ERRSTS 0xc8 + /* intel i810 registers */ #define I810_GMADDR 0x10 #define I810_MMADDR 0x14 diff --git a/drivers/char/agp/agpgart_be.c b/drivers/char/agp/agpgart_be.c index 5d2b3de619a2..942142832282 100644 --- a/drivers/char/agp/agpgart_be.c +++ b/drivers/char/agp/agpgart_be.c @@ -415,8 +415,9 @@ int agp_unbind_memory(agp_memory * curr) /* * Driver routines - start * Currently this module supports the following chipsets: - * i810, 440lx, 440bx, 440gx, via vp3, via mvp3, via kx133, via kt133, - * amd irongate, ALi M1541, and generic support for the SiS chipsets. + * i810, 440lx, 440bx, 440gx, i840, i850, via vp3, via mvp3, via kx133, + * via kt133, amd irongate, ALi M1541, and generic support for the SiS + * chipsets. */ /* Generic Agp routines - Start */ @@ -1191,6 +1192,38 @@ static int intel_840_configure(void) return 0; } +static int intel_850_configure(void) +{ + u32 temp; + u16 temp2; + aper_size_info_16 *current_size; + + current_size = A_SIZE_16(agp_bridge.current_size); + + /* aperture size */ + pci_write_config_byte(agp_bridge.dev, INTEL_APSIZE, + (char)current_size->size_value); + + /* address to map to */ + pci_read_config_dword(agp_bridge.dev, INTEL_APBASE, &temp); + agp_bridge.gart_bus_addr = (temp & PCI_BASE_ADDRESS_MEM_MASK); + + /* attbase - aperture base */ + pci_write_config_dword(agp_bridge.dev, INTEL_ATTBASE, + agp_bridge.gatt_bus_addr); + + /* agpctrl */ + pci_write_config_dword(agp_bridge.dev, INTEL_AGPCTRL, 0x0000); + + /* mcgcfg */ + pci_read_config_word(agp_bridge.dev, INTEL_I850_MCHCFG, &temp2); + pci_write_config_word(agp_bridge.dev, INTEL_I850_MCHCFG, + temp2 | (1 << 9)); + /* clear any possible AGP-related error conditions */ + pci_write_config_word(agp_bridge.dev, INTEL_I850_ERRSTS, 0x001c); + return 0; +} + static unsigned long intel_mask_memory(unsigned long addr, int type) { /* Memory type is ignored */ @@ -1272,6 +1305,34 @@ static int __init intel_840_setup (struct pci_dev *pdev) (void) pdev; /* unused */ } +static int __init intel_850_setup (struct pci_dev *pdev) +{ + agp_bridge.masks = intel_generic_masks; + agp_bridge.num_of_masks = 1; + agp_bridge.aperture_sizes = (void *) intel_generic_sizes; + agp_bridge.size_type = U16_APER_SIZE; + agp_bridge.num_aperture_sizes = 7; + agp_bridge.dev_private_data = NULL; + agp_bridge.needs_scratch_page = FALSE; + agp_bridge.configure = intel_850_configure; + agp_bridge.fetch_size = intel_fetch_size; + agp_bridge.cleanup = intel_cleanup; + agp_bridge.tlb_flush = intel_tlbflush; + agp_bridge.mask_memory = intel_mask_memory; + agp_bridge.agp_enable = agp_generic_agp_enable; + agp_bridge.cache_flush = global_cache_flush; + agp_bridge.create_gatt_table = agp_generic_create_gatt_table; + agp_bridge.free_gatt_table = agp_generic_free_gatt_table; + agp_bridge.insert_memory = agp_generic_insert_memory; + agp_bridge.remove_memory = agp_generic_remove_memory; + agp_bridge.alloc_by_type = agp_generic_alloc_by_type; + agp_bridge.free_by_type = agp_generic_free_by_type; + + return 0; + + (void) pdev; /* unused */ +} + #endif /* CONFIG_AGP_INTEL */ #ifdef CONFIG_AGP_VIA @@ -2070,6 +2131,12 @@ static struct { "Intel", "i840", intel_840_setup }, + { PCI_DEVICE_ID_INTEL_850_0, + PCI_VENDOR_ID_INTEL, + INTEL_I850, + "Intel", + "i850", + intel_850_setup }, { 0, PCI_VENDOR_ID_INTEL, INTEL_GENERIC, diff --git a/drivers/md/lvm-snap.c b/drivers/md/lvm-snap.c index 04007c1be586..980694ee32df 100644 --- a/drivers/md/lvm-snap.c +++ b/drivers/md/lvm-snap.c @@ -2,13 +2,14 @@ * kernel/lvm-snap.c * * Copyright (C) 2000 Andrea Arcangeli SuSE + * Heinz Mauelshagen, Sistina Software (persistent snapshots) * * LVM snapshot driver is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2, or (at your option) * any later version. * - * LVM driver is distributed in the hope that it will be useful, + * LVM snapshot driver is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. @@ -29,13 +30,27 @@ #include -static char *lvm_snap_version __attribute__ ((unused)) = "LVM 0.8final (15/02/2000)\n"; +static char *lvm_snap_version __attribute__ ((unused)) = "LVM 0.9 snapshot code (13/11/2000)\n"; extern const char *const lvm_name; extern int lvm_blocksizes[]; void lvm_snapshot_release(lv_t *); +uint lvm_pv_get_number(vg_t * vg, kdev_t rdev) +{ + uint p; + + for ( p = 0; p < vg->pv_max; p++) + { + if ( vg->pv[p] == NULL) continue; + if ( vg->pv[p]->pv_dev == rdev) break; + } + + return vg->pv[p]->pv_number; +} + + #define hashfn(dev,block,mask,chunk_size) \ ((HASHDEV(dev)^((block)/(chunk_size))) & (mask)) @@ -72,9 +87,9 @@ lvm_find_exception_table(kdev_t org_dev, unsigned long org_start, lv_t * lv) return ret; } -static inline void lvm_hash_link(lv_block_exception_t * exception, - kdev_t org_dev, unsigned long org_start, - lv_t * lv) +inline void lvm_hash_link(lv_block_exception_t * exception, + kdev_t org_dev, unsigned long org_start, + lv_t * lv) { struct list_head * hash_table = lv->lv_snapshot_hash_table; unsigned long mask = lv->lv_snapshot_hash_mask; @@ -97,7 +112,6 @@ int lvm_snapshot_remap_block(kdev_t * org_dev, unsigned long * org_sector, pe_adjustment = (*org_sector-pe_off) % chunk_size; __org_start = *org_sector - pe_adjustment; __org_dev = *org_dev; - ret = 0; exception = lvm_find_exception_table(__org_dev, __org_start, lv); if (exception) @@ -109,7 +123,7 @@ int lvm_snapshot_remap_block(kdev_t * org_dev, unsigned long * org_sector, return ret; } -static void lvm_drop_snapshot(lv_t * lv_snap, const char * reason) +void lvm_drop_snapshot(lv_t * lv_snap, const char * reason) { kdev_t last_dev; int i; @@ -118,8 +132,7 @@ static void lvm_drop_snapshot(lv_t * lv_snap, const char * reason) or error on this snapshot --> release it */ invalidate_buffers(lv_snap->lv_dev); - last_dev = 0; - for (i = 0; i < lv_snap->lv_remap_ptr; i++) { + for (i = last_dev = 0; i < lv_snap->lv_remap_ptr; i++) { if ( lv_snap->lv_block_exception[i].rdev_new != last_dev) { last_dev = lv_snap->lv_block_exception[i].rdev_new; invalidate_buffers(last_dev); @@ -149,7 +162,7 @@ static inline void lvm_snapshot_prepare_blocks(unsigned long * blocks, blocks[i] = start++; } -static inline int get_blksize(kdev_t dev) +inline int lvm_get_blksize(kdev_t dev) { int correct_size = BLOCK_SIZE, i, major; @@ -185,6 +198,133 @@ static inline void invalidate_snap_cache(unsigned long start, unsigned long nr, } #endif + +void lvm_snapshot_fill_COW_page(vg_t * vg, lv_t * lv_snap) +{ + int id = 0, is = lv_snap->lv_remap_ptr; + ulong blksize_snap; + lv_COW_table_disk_t * lv_COW_table = + ( lv_COW_table_disk_t *) page_address(lv_snap->lv_COW_table_page); + + if (is == 0) return; + is--; + blksize_snap = lvm_get_blksize(lv_snap->lv_block_exception[is].rdev_new); + is -= is % (blksize_snap / sizeof(lv_COW_table_disk_t)); + + memset(lv_COW_table, 0, blksize_snap); + for ( ; is < lv_snap->lv_remap_ptr; is++, id++) { + /* store new COW_table entry */ + lv_COW_table[id].pv_org_number = LVM_TO_DISK64(lvm_pv_get_number(vg, lv_snap->lv_block_exception[is].rdev_org)); + lv_COW_table[id].pv_org_rsector = LVM_TO_DISK64(lv_snap->lv_block_exception[is].rsector_org); + lv_COW_table[id].pv_snap_number = LVM_TO_DISK64(lvm_pv_get_number(vg, lv_snap->lv_block_exception[is].rdev_new)); + lv_COW_table[id].pv_snap_rsector = LVM_TO_DISK64(lv_snap->lv_block_exception[is].rsector_new); + } +} + + +/* + * writes a COW exception table sector to disk (HM) + * + */ + +int lvm_write_COW_table_block(vg_t * vg, + lv_t * lv_snap) +{ + int blksize_snap; + int end_of_table; + int idx = lv_snap->lv_remap_ptr, idx_COW_table; + int nr_pages_tmp; + int length_tmp; + ulong snap_pe_start, COW_table_sector_offset, + COW_entries_per_pe, COW_chunks_per_pe, COW_entries_per_block; + ulong blocks[1]; + const char * reason; + kdev_t snap_phys_dev; + struct kiobuf * iobuf = lv_snap->lv_iobuf; + struct page * page_tmp; + lv_COW_table_disk_t * lv_COW_table = + ( lv_COW_table_disk_t *) page_address(lv_snap->lv_COW_table_page); + + idx--; + + COW_chunks_per_pe = LVM_GET_COW_TABLE_CHUNKS_PER_PE(vg, lv_snap); + COW_entries_per_pe = LVM_GET_COW_TABLE_ENTRIES_PER_PE(vg, lv_snap); + + /* get physical addresse of destination chunk */ + snap_phys_dev = lv_snap->lv_block_exception[idx].rdev_new; + snap_pe_start = lv_snap->lv_block_exception[idx - (idx % COW_entries_per_pe)].rsector_new - lv_snap->lv_chunk_size; + + blksize_snap = lvm_get_blksize(snap_phys_dev); + + COW_entries_per_block = blksize_snap / sizeof(lv_COW_table_disk_t); + idx_COW_table = idx % COW_entries_per_pe % COW_entries_per_block; + + if ( idx_COW_table == 0) memset(lv_COW_table, 0, blksize_snap); + + /* sector offset into the on disk COW table */ + COW_table_sector_offset = (idx % COW_entries_per_pe) / (SECTOR_SIZE / sizeof(lv_COW_table_disk_t)); + + /* COW table block to write next */ + blocks[0] = (snap_pe_start + COW_table_sector_offset) >> (blksize_snap >> 10); + + /* store new COW_table entry */ + lv_COW_table[idx_COW_table].pv_org_number = LVM_TO_DISK64(lvm_pv_get_number(vg, lv_snap->lv_block_exception[idx].rdev_org)); + lv_COW_table[idx_COW_table].pv_org_rsector = LVM_TO_DISK64(lv_snap->lv_block_exception[idx].rsector_org); + lv_COW_table[idx_COW_table].pv_snap_number = LVM_TO_DISK64(lvm_pv_get_number(vg, snap_phys_dev)); + lv_COW_table[idx_COW_table].pv_snap_rsector = LVM_TO_DISK64(lv_snap->lv_block_exception[idx].rsector_new); + + length_tmp = iobuf->length; + iobuf->length = blksize_snap; + page_tmp = iobuf->maplist[0]; + iobuf->maplist[0] = lv_snap->lv_COW_table_page; + nr_pages_tmp = iobuf->nr_pages; + iobuf->nr_pages = 1; + + if (brw_kiovec(WRITE, 1, &iobuf, snap_phys_dev, + blocks, blksize_snap) != blksize_snap) + goto fail_raw_write; + + + /* initialization of next COW exception table block with zeroes */ + end_of_table = idx % COW_entries_per_pe == COW_entries_per_pe - 1; + if (idx_COW_table % COW_entries_per_block == COW_entries_per_block - 1 || end_of_table) + { + /* don't go beyond the end */ + if (idx + 1 >= lv_snap->lv_remap_end) goto good_out; + + memset(lv_COW_table, 0, blksize_snap); + + if (end_of_table) + { + idx++; + snap_phys_dev = lv_snap->lv_block_exception[idx].rdev_new; + snap_pe_start = lv_snap->lv_block_exception[idx - (idx % COW_entries_per_pe)].rsector_new - lv_snap->lv_chunk_size; + blksize_snap = lvm_get_blksize(snap_phys_dev); + blocks[0] = snap_pe_start >> (blksize_snap >> 10); + } else blocks[0]++; + + if (brw_kiovec(WRITE, 1, &iobuf, snap_phys_dev, + blocks, blksize_snap) != blksize_snap) + goto fail_raw_write; + } + + + good_out: + iobuf->length = length_tmp; + iobuf->maplist[0] = page_tmp; + iobuf->nr_pages = nr_pages_tmp; + return 0; + + /* slow path */ + out: + lvm_drop_snapshot(lv_snap, reason); + return 1; + + fail_raw_write: + reason = "write error"; + goto out; +} + /* * copy on write handler for one snapshot logical volume * @@ -200,9 +340,8 @@ int lvm_snapshot_COW(kdev_t org_phys_dev, lv_t * lv_snap) { const char * reason; - unsigned long org_start, snap_start, virt_start, pe_off; + unsigned long org_start, snap_start, snap_phys_dev, virt_start, pe_off; int idx = lv_snap->lv_remap_ptr, chunk_size = lv_snap->lv_chunk_size; - kdev_t snap_phys_dev; struct kiobuf * iobuf; unsigned long blocks[KIO_MAX_SECTORS]; int blksize_snap, blksize_org, min_blksize, max_blksize; @@ -238,8 +377,8 @@ int lvm_snapshot_COW(kdev_t org_phys_dev, iobuf = lv_snap->lv_iobuf; - blksize_org = get_blksize(org_phys_dev); - blksize_snap = get_blksize(snap_phys_dev); + blksize_org = lvm_get_blksize(org_phys_dev); + blksize_snap = lvm_get_blksize(snap_phys_dev); max_blksize = max(blksize_org, blksize_snap); min_blksize = min(blksize_org, blksize_snap); max_sectors = KIO_MAX_SECTORS * (min_blksize>>9); @@ -268,7 +407,7 @@ int lvm_snapshot_COW(kdev_t org_phys_dev, } #ifdef DEBUG_SNAPSHOT - /* invalidate the logcial snapshot buffer cache */ + /* invalidate the logical snapshot buffer cache */ invalidate_snap_cache(virt_start, lv_snap->lv_chunk_size, lv_snap->lv_dev); #endif @@ -277,15 +416,20 @@ int lvm_snapshot_COW(kdev_t org_phys_dev, so update the execption table */ lv_snap->lv_block_exception[idx].rdev_org = org_phys_dev; lv_snap->lv_block_exception[idx].rsector_org = org_start; + lvm_hash_link(lv_snap->lv_block_exception + idx, org_phys_dev, org_start, lv_snap); lv_snap->lv_remap_ptr = idx + 1; - return 1; + if (lv_snap->lv_snapshot_use_rate > 0) { + if (lv_snap->lv_remap_ptr * 100 / lv_snap->lv_remap_end >= lv_snap->lv_snapshot_use_rate) + wake_up_interruptible(&lv_snap->lv_snapshot_wait); + } + return 0; /* slow path */ out: lvm_drop_snapshot(lv_snap, reason); - return -1; + return 1; fail_out_of_space: reason = "out of space"; @@ -301,7 +445,7 @@ int lvm_snapshot_COW(kdev_t org_phys_dev, goto out; } -static int lvm_snapshot_alloc_iobuf_pages(struct kiobuf * iobuf, int sectors) +int lvm_snapshot_alloc_iobuf_pages(struct kiobuf * iobuf, int sectors) { int bytes, nr_pages, err, i; @@ -312,33 +456,17 @@ static int lvm_snapshot_alloc_iobuf_pages(struct kiobuf * iobuf, int sectors) goto out; err = -ENOMEM; - iobuf->locked = 1; + iobuf->locked = 0; iobuf->nr_pages = 0; for (i = 0; i < nr_pages; i++) { struct page * page; -#if LINUX_VERSION_CODE >= KERNEL_VERSION(2,3,27) page = alloc_page(GFP_KERNEL); if (!page) goto out; -#else - { - unsigned long addr = __get_free_page(GFP_USER); - if (!addr) - goto out; - iobuf->pagelist[i] = addr; - page = virt_to_page(addr); - } -#endif iobuf->maplist[i] = page; - /* the only point to lock the page here is to be allowed - to share unmap_kiobuf() in the fail-path */ -#ifndef LockPage -#define LockPage(map) set_bit(PG_locked, &(map)->flags) -#endif - LockPage(page); iobuf->nr_pages++; } iobuf->offset = 0; @@ -360,7 +488,7 @@ static int calc_max_buckets(void) return mem; } -static int lvm_snapshot_alloc_hash_table(lv_t * lv) +int lvm_snapshot_alloc_hash_table(lv_t * lv) { int err; unsigned long buckets, max_buckets, size; @@ -380,6 +508,7 @@ static int lvm_snapshot_alloc_hash_table(lv_t * lv) if (!hash) goto out; + lv->lv_snapshot_hash_table_size = size; lv->lv_snapshot_hash_mask = buckets-1; while (buckets--) @@ -407,12 +536,20 @@ int lvm_snapshot_alloc(lv_t * lv_snap) err = lvm_snapshot_alloc_hash_table(lv_snap); if (err) goto out_free_kiovec; + + + lv_snap->lv_COW_table_page = alloc_page(GFP_KERNEL); + if (!lv_snap->lv_COW_table_page) + goto out_free_kiovec; + out: return err; out_free_kiovec: unmap_kiobuf(lv_snap->lv_iobuf); free_kiovec(1, &lv_snap->lv_iobuf); + vfree(lv_snap->lv_snapshot_hash_table); + lv_snap->lv_snapshot_hash_table = NULL; goto out; } @@ -427,10 +564,17 @@ void lvm_snapshot_release(lv_t * lv) { vfree(lv->lv_snapshot_hash_table); lv->lv_snapshot_hash_table = NULL; + lv->lv_snapshot_hash_table_size = 0; } if (lv->lv_iobuf) { + unmap_kiobuf(lv->lv_iobuf); free_kiovec(1, &lv->lv_iobuf); lv->lv_iobuf = NULL; } + if (lv->lv_COW_table_page) + { + free_page((ulong)lv->lv_COW_table_page); + lv->lv_COW_table_page = NULL; + } } diff --git a/drivers/md/lvm.c b/drivers/md/lvm.c index f9433232eb9f..ea276c57c95e 100644 --- a/drivers/md/lvm.c +++ b/drivers/md/lvm.c @@ -1,12 +1,12 @@ /* * kernel/lvm.c * - * Copyright (C) 1997 - 2000 Heinz Mauelshagen, Germany + * Copyright (C) 1997 - 2000 Heinz Mauelshagen, Sistina Software * * February-November 1997 * April-May,July-August,November 1998 * January-March,May,July,September,October 1999 - * January,February 2000 + * January,February,July,September-November 2000 * * * LVM driver is free software; you can redistribute it and/or modify @@ -38,7 +38,7 @@ * lvm_status_byindex_req_t vars * 04/05/1998 - added multiple device support * 08/05/1998 - added support to set/clear extendable flag in volume group - * 09/05/1998 - changed output of lvm_proc_get_info() because of + * 09/05/1998 - changed output of lvm_proc_get_global_info() because of * support for free (eg. longer) logical volume names * 12/05/1998 - added spin_locks (thanks to Pascal van Dam * ) @@ -122,18 +122,36 @@ * - avoided "/dev/" in proc filesystem output * - avoided inline strings functions lvm_strlen etc. * 14/02/2000 - support for 2.3.43 - * - integrated Andrea Arcangeli's snapshot code + * - integrated Andrea Arcagneli's snapshot code + * 25/06/2000 - james (chip) , IKKHAYD! roffl + * 26/06/2000 - enhanced lv_extend_reduce for snapshot logical volume support + * 06/09/2000 - added devfs support + * 07/09/2000 - changed IOP version to 9 + * - started to add new char ioctl LV_STATUS_BYDEV_T to support + * getting an lv_t based on the dev_t of the Logical Volume + * 14/09/2000 - enhanced lvm_do_lv_create to upcall VFS functions + * to sync and lock, activate snapshot and unlock the FS + * (to support journaled filesystems) + * 18/09/2000 - hardsector size support + * 27/09/2000 - implemented lvm_do_lv_rename() and lvm_do_vg_rename() + * 30/10/2000 - added Andi Kleen's LV_BMAP ioctl to support LILO + * 01/11/2000 - added memory information on hash tables to + * lvm_proc_get_global_info() + * 02/11/2000 - implemented /proc/lvm/ hierarchy * 07/12/2000 - make sure lvm_make_request_fn returns correct value - 0 or 1 - NeilBrown * */ -static char *lvm_version = "LVM version 0.8final by Heinz Mauelshagen (15/02/2000)\n"; -static char *lvm_short_version = "version 0.8final (15/02/2000)"; +static char *lvm_version = "LVM version 0.9 by Heinz Mauelshagen (13/11/2000)\n"; +static char *lvm_short_version = "version 0.9 (13/11/2000)"; #define MAJOR_NR LVM_BLK_MAJOR #define DEVICE_OFF(device) +/* lvm_do_lv_create calls fsync_dev_lockfs()/unlockfs() */ +/* #define LVM_VFS_ENHANCEMENT */ + #include #include @@ -166,17 +184,15 @@ static char *lvm_short_version = "version 0.8final (15/02/2000)"; #include #endif -#define LOCAL_END_REQUEST - #include #include #include #include -#define LVM_CORRECT_READ_AHEAD(a) \ - (((a) < LVM_MIN_READ_AHEAD || (a) > LVM_MAX_READ_AHEAD) \ - ? LVM_MAX_READ_AHEAD : (a)) +#define LVM_CORRECT_READ_AHEAD( a) \ + if ( a < LVM_MIN_READ_AHEAD || \ + a > LVM_MAX_READ_AHEAD) a = LVM_MAX_READ_AHEAD; #ifndef WRITEA # define WRITEA WRITE @@ -195,8 +211,7 @@ extern int lvm_init(void); static void lvm_dummy_device_request(request_queue_t *); #define DEVICE_REQUEST lvm_dummy_device_request -static int lvm_make_request_fn(request_queue_t *, int, struct buffer_head*); -static void lvm_plug_device_noop(request_queue_t *, kdev_t); +static int lvm_make_request_fn(request_queue_t*, int, struct buffer_head*); static int lvm_blk_ioctl(struct inode *, struct file *, uint, ulong); static int lvm_blk_open(struct inode *, struct file *); @@ -205,13 +220,21 @@ static int lvm_chr_open(struct inode *, struct file *); static int lvm_chr_close(struct inode *, struct file *); static int lvm_blk_close(struct inode *, struct file *); +static int lvm_user_bmap(struct inode *, struct lv_bmap *); static int lvm_chr_ioctl(struct inode *, struct file *, uint, ulong); #if defined CONFIG_LVM_PROC_FS && defined CONFIG_PROC_FS -static int lvm_proc_get_info(char *, char **, off_t, int); -static int (*lvm_proc_get_info_ptr) (char *, char **, off_t, int) = -&lvm_proc_get_info; +int lvm_proc_read_vg_info(char *, char **, off_t, int, int *, void *); +int lvm_proc_read_lv_info(char *, char **, off_t, int, int *, void *); +int lvm_proc_read_pv_info(char *, char **, off_t, int, int *, void *); +static int lvm_proc_get_global_info(char *, char **, off_t, int, int *, void *); +void lvm_do_create_proc_entry_of_vg ( vg_t *); +inline void lvm_do_remove_proc_entry_of_vg ( vg_t *); +inline void lvm_do_create_proc_entry_of_lv ( vg_t *, lv_t *); +inline void lvm_do_remove_proc_entry_of_lv ( vg_t *, lv_t *); +inline void lvm_do_create_proc_entry_of_pv ( vg_t *, pv_t *); +inline void lvm_do_remove_proc_entry_of_pv ( vg_t *, pv_t *); #endif #ifdef LVM_HD_NAME @@ -226,10 +249,16 @@ void lvm_hd_name(char *, int); static void lvm_init_vars(void); /* external snapshot calls */ -int lvm_snapshot_remap_block(kdev_t *, ulong *, ulong, lv_t *); -int lvm_snapshot_COW(kdev_t, ulong, ulong, ulong, lv_t *); -int lvm_snapshot_alloc(lv_t *); -void lvm_snapshot_release(lv_t *); +extern inline int lvm_get_blksize(kdev_t); +extern int lvm_snapshot_alloc(lv_t *); +extern void lvm_snapshot_fill_COW_page(vg_t *, lv_t *); +extern int lvm_snapshot_COW(kdev_t, ulong, ulong, ulong, lv_t *); +extern int lvm_snapshot_remap_block(kdev_t *, ulong *, ulong, lv_t *); +extern void lvm_snapshot_release(lv_t *); +extern int lvm_write_COW_table_block(vg_t *, lv_t *); +extern inline void lvm_hash_link(lv_block_exception_t *, kdev_t, ulong, lv_t *); +extern int lvm_snapshot_alloc_hash_table(lv_t *); +extern void lvm_drop_snapshot(lv_t *, char *); #ifdef LVM_HD_NAME extern void (*lvm_hd_name_ptr) (char *, int); @@ -237,21 +266,30 @@ extern void (*lvm_hd_name_ptr) (char *, int); static int lvm_map(struct buffer_head *, int); static int lvm_do_lock_lvm(void); static int lvm_do_le_remap(vg_t *, void *); -static int lvm_do_pe_lock_unlock(vg_t *r, void *); -static int lvm_do_vg_create(int, void *); -static int lvm_do_vg_extend(vg_t *, void *); -static int lvm_do_vg_reduce(vg_t *, void *); -static int lvm_do_vg_remove(int); + +static int lvm_do_pv_create(pv_t *, vg_t *, ulong); +static int lvm_do_pv_remove(vg_t *, ulong); static int lvm_do_lv_create(int, char *, lv_t *); -static int lvm_do_lv_remove(int, char *, int); static int lvm_do_lv_extend_reduce(int, char *, lv_t *); +static int lvm_do_lv_remove(int, char *, int); +static int lvm_do_lv_rename(vg_t *, lv_req_t *, lv_t *); static int lvm_do_lv_status_byname(vg_t *r, void *); -static int lvm_do_lv_status_byindex(vg_t *, void *arg); +static int lvm_do_lv_status_byindex(vg_t *, void *); +static int lvm_do_lv_status_bydev(vg_t *, void *); + +static int lvm_do_pe_lock_unlock(vg_t *r, void *); + static int lvm_do_pv_change(vg_t*, void*); static int lvm_do_pv_status(vg_t *, void *); + +static int lvm_do_vg_create(int, void *); +static int lvm_do_vg_extend(vg_t *, void *); +static int lvm_do_vg_reduce(vg_t *, void *); +static int lvm_do_vg_rename(vg_t *, void *); +static int lvm_do_vg_remove(int); static void lvm_geninit(struct gendisk *); #ifdef LVM_GET_INODE -static struct inode *lvm_get_inode(kdev_t); +static struct inode *lvm_get_inode(int); void lvm_clear_inode(struct inode *); #endif /* END Internal function prototypes */ @@ -259,10 +297,19 @@ void lvm_clear_inode(struct inode *); /* volume group descriptor area pointers */ static vg_t *vg[ABS_MAX_VG]; + +#ifdef CONFIG_DEVFS_FS +static devfs_handle_t lvm_devfs_handle; +static devfs_handle_t vg_devfs_handle[MAX_VG]; +static devfs_handle_t ch_devfs_handle[MAX_VG]; +static devfs_handle_t lv_devfs_handle[MAX_LV]; +#endif + static pv_t *pvp = NULL; static lv_t *lvp = NULL; static pe_t *pep = NULL; static pe_t *pep1 = NULL; +static char *basename = NULL; /* map from block minor number to VG and LV numbers */ @@ -287,7 +334,6 @@ static int lvm_reset_spindown = 0; static char pv_name[NAME_LEN]; /* static char rootvg[NAME_LEN] = { 0, }; */ -static uint lv_open = 0; const char *const lvm_name = LVM_NAME; static int lock = 0; static int loadtime = 0; @@ -299,27 +345,31 @@ static DECLARE_WAIT_QUEUE_HEAD(lvm_wait); static DECLARE_WAIT_QUEUE_HEAD(lvm_map_wait); static spinlock_t lvm_lock = SPIN_LOCK_UNLOCKED; +static spinlock_t lvm_snapshot_lock = SPIN_LOCK_UNLOCKED; -static devfs_handle_t lvm_devfs_handle; -static devfs_handle_t vg_devfs_handle[MAX_VG]; -static devfs_handle_t ch_devfs_handle[MAX_VG]; -static devfs_handle_t lv_devfs_handle[MAX_LV]; +#if defined CONFIG_LVM_PROC_FS && defined CONFIG_PROC_FS +static struct proc_dir_entry *lvm_proc_dir = NULL; +static struct proc_dir_entry *lvm_proc_vg_subdir = NULL; +struct proc_dir_entry *pde = NULL; +#endif static struct file_operations lvm_chr_fops = { - owner: THIS_MODULE, open: lvm_chr_open, release: lvm_chr_close, ioctl: lvm_chr_ioctl, }; +#define BLOCK_DEVICE_OPERATIONS +/* block device operations structure needed for 2.3.38? and above */ static struct block_device_operations lvm_blk_dops = { open: lvm_blk_open, release: lvm_blk_close, - ioctl: lvm_blk_ioctl + ioctl: lvm_blk_ioctl, }; + /* gendisk structures */ static struct hd_struct lvm_hd_struct[MAX_LV]; static int lvm_blocksizes[MAX_LV] = @@ -364,21 +414,32 @@ int __init lvm_init(void) printk(KERN_ERR "%s -- register_chrdev failed\n", lvm_name); return -EIO; } - if (register_blkdev(MAJOR_NR, lvm_name, &lvm_blk_dops) < 0) { +#ifdef BLOCK_DEVICE_OPERATIONS + if (register_blkdev(MAJOR_NR, lvm_name, &lvm_blk_dops) < 0) +#else + if (register_blkdev(MAJOR_NR, lvm_name, &lvm_blk_fops) < 0) +#endif + { printk("%s -- register_blkdev failed\n", lvm_name); if (unregister_chrdev(LVM_CHAR_MAJOR, lvm_name) < 0) printk(KERN_ERR "%s -- unregister_chrdev failed\n", lvm_name); return -EIO; } +#ifdef CONFIG_DEVFS_FS lvm_devfs_handle = devfs_register( 0 , "lvm", 0, 0, LVM_CHAR_MAJOR, S_IFCHR | S_IRUSR | S_IWUSR | S_IRGRP, &lvm_chr_fops, NULL); +#endif #if defined CONFIG_LVM_PROC_FS && defined CONFIG_PROC_FS - create_proc_info_entry(LVM_NAME, S_IFREG | S_IRUGO, - &proc_root, lvm_proc_get_info_ptr); + lvm_proc_dir = create_proc_entry (LVM_DIR, S_IFDIR, &proc_root); + if (lvm_proc_dir != NULL) { + lvm_proc_vg_subdir = create_proc_entry (LVM_VG_SUBDIR, S_IFDIR, lvm_proc_dir); + pde = create_proc_entry(LVM_GLOBAL, S_IFREG, lvm_proc_dir); + if ( pde != NULL) pde->read_proc = &lvm_proc_get_global_info; + } #endif lvm_init_vars(); @@ -405,7 +466,7 @@ int __init lvm_init(void) blk_init_queue(BLK_DEFAULT_QUEUE(MAJOR_NR), DEVICE_REQUEST); blk_queue_make_request(BLK_DEFAULT_QUEUE(MAJOR_NR), lvm_make_request_fn); - blk_queue_pluggable(BLK_DEFAULT_QUEUE(MAJOR_NR), lvm_plug_device_noop); + /* optional read root VGDA */ /* if ( *rootvg != 0) vg_read_with_pv_and_lv ( rootvg, &vg); @@ -433,7 +494,9 @@ void cleanup_module(void) { struct gendisk *gendisk_ptr = NULL, *gendisk_ptr_prev = NULL; +#ifdef CONFIG_DEVFS_FS devfs_unregister (lvm_devfs_handle); +#endif if (unregister_chrdev(LVM_CHAR_MAJOR, lvm_name) < 0) { printk(KERN_ERR "%s -- unregister_chrdev failed\n", lvm_name); @@ -456,9 +519,12 @@ void cleanup_module(void) blk_size[MAJOR_NR] = NULL; blksize_size[MAJOR_NR] = NULL; + hardsect_size[MAJOR_NR] = NULL; #if defined CONFIG_LVM_PROC_FS && defined CONFIG_PROC_FS - remove_proc_entry(LVM_NAME, &proc_root); + remove_proc_entry(LVM_GLOBAL, lvm_proc_dir); + remove_proc_entry(LVM_VG_SUBDIR, lvm_proc_dir); + remove_proc_entry(LVM_DIR, &proc_root); #endif #ifdef LVM_HD_NAME @@ -486,8 +552,11 @@ void __init lvm_init_vars(void) loadtime = CURRENT_TIME; + lvm_lock = lvm_snapshot_lock = SPIN_LOCK_UNLOCKED; + pe_lock_req.lock = UNLOCK_PE; - pe_lock_req.data.lv_dev = pe_lock_req.data.pv_dev = 0; + pe_lock_req.data.lv_dev = \ + pe_lock_req.data.pv_dev = \ pe_lock_req.data.pv_offset = 0; /* Initialize VG pointers */ @@ -531,6 +600,9 @@ static int lvm_chr_open(struct inode *inode, if (VG_CHR(minor) > MAX_VG) return -ENXIO; lvm_chr_open_count++; + + MOD_INC_USE_COUNT; + return 0; } /* lvm_chr_open() */ @@ -592,7 +664,7 @@ static int lvm_chr_ioctl(struct inode *inode, struct file *file, MOD_INC_USE_COUNT; while (GET_USE_COUNT(&__this_module) > 1) MOD_DEC_USE_COUNT; -#endif /* MODULE */ +#endif /* MODULE */ lock = 0; /* release lock */ wake_up_interruptible(&lvm_wait); return 0; @@ -612,17 +684,21 @@ static int lvm_chr_ioctl(struct inode *inode, struct file *file, /* create a VGDA */ return lvm_do_vg_create(minor, arg); - case VG_REMOVE: - /* remove an inactive VGDA */ - return lvm_do_vg_remove(minor); - case VG_EXTEND: /* extend a volume group */ - return lvm_do_vg_extend(vg_ptr,arg); + return lvm_do_vg_extend(vg_ptr, arg); case VG_REDUCE: /* reduce a volume group */ - return lvm_do_vg_reduce(vg_ptr,arg); + return lvm_do_vg_reduce(vg_ptr, arg); + + case VG_RENAME: + /* rename a volume group */ + return lvm_do_vg_rename(vg_ptr, arg); + + case VG_REMOVE: + /* remove an inactive VGDA */ + return lvm_do_vg_remove(minor); case VG_SET_EXTENDABLE: @@ -660,20 +736,22 @@ static int lvm_chr_ioctl(struct inode *inode, struct file *file, /* get volume group count */ for (l = v = 0; v < ABS_MAX_VG; v++) { if (vg[v] != NULL) { - if (copy_to_user(arg + l++ * NAME_LEN, + if (copy_to_user(arg + l * NAME_LEN, vg[v]->vg_name, NAME_LEN) != 0) return -EFAULT; + l++; } } return 0; case LV_CREATE: - case LV_REMOVE: case LV_EXTEND: case LV_REDUCE: - /* create, remove, extend or reduce a logical volume */ + case LV_REMOVE: + case LV_RENAME: + /* create, extend, reduce, remove or rename a logical volume */ if (vg_ptr == NULL) return -ENXIO; if (copy_from_user(&lv_req, arg, sizeof(lv_req)) != 0) return -EFAULT; @@ -686,52 +764,54 @@ static int lvm_chr_ioctl(struct inode *inode, struct file *file, case LV_CREATE: return lvm_do_lv_create(minor, lv_req.lv_name, &lv); - case LV_REMOVE: - return lvm_do_lv_remove(minor, lv_req.lv_name, -1); - case LV_EXTEND: case LV_REDUCE: return lvm_do_lv_extend_reduce(minor, lv_req.lv_name, &lv); + case LV_REMOVE: + return lvm_do_lv_remove(minor, lv_req.lv_name, -1); + + case LV_RENAME: + return lvm_do_lv_rename(vg_ptr, &lv_req, &lv); } + + case LV_STATUS_BYNAME: /* get status of a logical volume by name */ - return lvm_do_lv_status_byname(vg_ptr,arg); + return lvm_do_lv_status_byname(vg_ptr, arg); + case LV_STATUS_BYINDEX: /* get status of a logical volume by index */ - return lvm_do_lv_status_byindex(vg_ptr,arg); + return lvm_do_lv_status_byindex(vg_ptr, arg); + + + case LV_STATUS_BYDEV: + return lvm_do_lv_status_bydev(vg_ptr, arg); + case PV_CHANGE: /* change a physical volume */ return lvm_do_pv_change(vg_ptr,arg); + case PV_STATUS: /* get physical volume data (pv_t structure only) */ return lvm_do_pv_status(vg_ptr,arg); + case PV_FLUSH: /* physical volume buffer flush/invalidate */ if (copy_from_user(&pv_flush_req, arg, sizeof(pv_flush_req)) != 0) return -EFAULT; - for ( v = 0; v < ABS_MAX_VG; v++) { - unsigned int p; - if ( vg[v] == NULL) continue; - for ( p = 0; p < vg[v]->pv_max; p++) { - if ( vg[v]->pv[p] != NULL && - strcmp ( vg[v]->pv[p]->pv_name, - pv_flush_req.pv_name) == 0) { - fsync_dev ( vg[v]->pv[p]->pv_dev); - invalidate_buffers ( vg[v]->pv[p]->pv_dev); - return 0; - } - } - } + fsync_dev(pv_flush_req.pv_dev); + invalidate_buffers(pv_flush_req.pv_dev); return 0; + default: printk(KERN_WARNING "%s -- lvm_chr_ioctl: unknown command %x\n", @@ -754,11 +834,10 @@ static int lvm_chr_close(struct inode *inode, struct file *file) "%s -- lvm_chr_close VG#: %d\n", lvm_name, VG_CHR(minor)); #endif - lock_kernel(); #ifdef LVM_TOTAL_RESET if (lvm_reset_spindown > 0) { lvm_reset_spindown = 0; - lvm_chr_open_count = 1; + lvm_chr_open_count = 0; } #endif @@ -767,7 +846,8 @@ static int lvm_chr_close(struct inode *inode, struct file *file) lock = 0; /* release lock */ wake_up_interruptible(&lvm_wait); } - unlock_kernel(); + + MOD_DEC_USE_COUNT; return 0; } /* lvm_chr_close() */ @@ -815,6 +895,10 @@ static int lvm_blk_open(struct inode *inode, struct file *file) if (!(lv_ptr->lv_access & LV_WRITE)) return -EACCES; } +#ifndef BLOCK_DEVICE_OPERATIONS + file->f_op = &lvm_blk_fops; +#endif + /* be sure to increment VG counter */ if (lv_ptr->lv_open == 0) vg_ptr->lv_open++; lv_ptr->lv_open++; @@ -863,7 +947,7 @@ static int lvm_blk_ioctl(struct inode *inode, struct file *file, lvm_name, lv_ptr->lv_size); #endif if (put_user(lv_ptr->lv_size, (long *)arg)) - return -EFAULT; + return -EFAULT; break; @@ -892,7 +976,7 @@ static int lvm_blk_ioctl(struct inode *inode, struct file *file, if ((long) arg < LVM_MIN_READ_AHEAD || (long) arg > LVM_MAX_READ_AHEAD) return -EINVAL; - read_ahead[MAJOR_NR] = lv_ptr->lv_read_ahead = (long) arg; + lv_ptr->lv_read_ahead = (long) arg; break; @@ -944,6 +1028,10 @@ static int lvm_blk_ioctl(struct inode *inode, struct file *file, /* set access flags of a logical volume */ if (!capable(CAP_SYS_ADMIN)) return -EACCES; lv_ptr->lv_access = (ulong) arg; + if ( lv_ptr->lv_access & LV_WRITE) + set_device_ro(lv_ptr->lv_dev, 0); + else + set_device_ro(lv_ptr->lv_dev, 1); break; @@ -955,6 +1043,10 @@ static int lvm_blk_ioctl(struct inode *inode, struct file *file, lv_ptr->lv_status = (ulong) arg; break; + case LV_BMAP: + /* turn logical block into (dev_t, block). non privileged. */ + return lvm_user_bmap(inode, (struct lv_bmap *) arg); + break; case LV_SET_ALLOCATION: /* set allocation flags of a logical volume */ @@ -962,6 +1054,37 @@ static int lvm_blk_ioctl(struct inode *inode, struct file *file, lv_ptr->lv_allocation = (ulong) arg; break; + case LV_SNAPSHOT_USE_RATE: + if (!(lv_ptr->lv_access & LV_SNAPSHOT)) return -EPERM; + { + lv_snapshot_use_rate_req_t lv_snapshot_use_rate_req; + + if (copy_from_user(&lv_snapshot_use_rate_req, arg, + sizeof(lv_snapshot_use_rate_req_t))) + return -EFAULT; + if (lv_snapshot_use_rate_req.rate < 0 || + lv_snapshot_use_rate_req.rate > 100) return -EFAULT; + + switch (lv_snapshot_use_rate_req.block) + { + case 0: + lv_ptr->lv_snapshot_use_rate = lv_snapshot_use_rate_req.rate; + if (lv_ptr->lv_remap_ptr * 100 / lv_ptr->lv_remap_end < lv_ptr->lv_snapshot_use_rate) + interruptible_sleep_on (&lv_ptr->lv_snapshot_wait); + break; + + case O_NONBLOCK: + break; + + default: + return -EFAULT; + } + lv_snapshot_use_rate_req.rate = lv_ptr->lv_remap_ptr * 100 / lv_ptr->lv_remap_end; + if (copy_to_user(arg, &lv_snapshot_use_rate_req, + sizeof(lv_snapshot_use_rate_req_t))) + return -EFAULT; + } + break; default: printk(KERN_WARNING @@ -999,20 +1122,163 @@ static int lvm_blk_close(struct inode *inode, struct file *file) } /* lvm_blk_close() */ +static int lvm_user_bmap(struct inode *inode, struct lv_bmap *user_result) +{ + struct buffer_head bh; + unsigned long block; + int err; + + if (get_user(block, &user_result->lv_block)) + return -EFAULT; + + memset(&bh,0,sizeof bh); + bh.b_rsector = block; + bh.b_dev = bh.b_rdev = inode->i_dev; + bh.b_size = lvm_get_blksize(bh.b_dev); + if ((err=lvm_map(&bh, READ)) < 0) { + printk("lvm map failed: %d\n", err); + return -EINVAL; + } + + return put_user( kdev_t_to_nr(bh.b_rdev), &user_result->lv_dev) || + put_user(bh.b_rsector, &user_result->lv_block) ? -EFAULT : 0; +} + + +/* + * provide VG info for proc filesystem use (global) + */ +int lvm_vg_info(vg_t *vg_ptr, char *buf) { + int sz = 0; + char inactive_flag = ' '; + + if (!(vg_ptr->vg_status & VG_ACTIVE)) inactive_flag = 'I'; + sz = sprintf(buf, + "\nVG: %c%s [%d PV, %d LV/%d open] " + " PE Size: %d KB\n" + " Usage [KB/PE]: %d /%d total " + "%d /%d used %d /%d free", + inactive_flag, + vg_ptr->vg_name, + vg_ptr->pv_cur, + vg_ptr->lv_cur, + vg_ptr->lv_open, + vg_ptr->pe_size >> 1, + vg_ptr->pe_size * vg_ptr->pe_total >> 1, + vg_ptr->pe_total, + vg_ptr->pe_allocated * vg_ptr->pe_size >> 1, + vg_ptr->pe_allocated, + (vg_ptr->pe_total - vg_ptr->pe_allocated) * + vg_ptr->pe_size >> 1, + vg_ptr->pe_total - vg_ptr->pe_allocated); + return sz; +} + + +/* + * provide LV info for proc filesystem use (global) + */ +int lvm_lv_info(vg_t *vg_ptr, lv_t *lv_ptr, char *buf) { + int sz = 0; + char inactive_flag = 'A', allocation_flag = ' ', + stripes_flag = ' ', rw_flag = ' '; + + if (!(lv_ptr->lv_status & LV_ACTIVE)) + inactive_flag = 'I'; + rw_flag = 'R'; + if (lv_ptr->lv_access & LV_WRITE) + rw_flag = 'W'; + allocation_flag = 'D'; + if (lv_ptr->lv_allocation & LV_CONTIGUOUS) + allocation_flag = 'C'; + stripes_flag = 'L'; + if (lv_ptr->lv_stripes > 1) + stripes_flag = 'S'; + sz += sprintf(buf+sz, + "[%c%c%c%c", + inactive_flag, + rw_flag, + allocation_flag, + stripes_flag); + if (lv_ptr->lv_stripes > 1) + sz += sprintf(buf+sz, "%-2d", + lv_ptr->lv_stripes); + else + sz += sprintf(buf+sz, " "); + basename = strrchr(lv_ptr->lv_name, '/'); + if ( basename == 0) basename = lv_ptr->lv_name; + else basename++; + sz += sprintf(buf+sz, "] %-25s", basename); + if (strlen(basename) > 25) + sz += sprintf(buf+sz, + "\n "); + sz += sprintf(buf+sz, "%9d /%-6d ", + lv_ptr->lv_size >> 1, + lv_ptr->lv_size / vg_ptr->pe_size); + + if (lv_ptr->lv_open == 0) + sz += sprintf(buf+sz, "close"); + else + sz += sprintf(buf+sz, "%dx open", + lv_ptr->lv_open); + + return sz; +} + + +/* + * provide PV info for proc filesystem use (global) + */ +int lvm_pv_info(pv_t *pv_ptr, char *buf) { + int sz = 0; + char inactive_flag = 'A', allocation_flag = ' '; + char *pv_name = NULL; + + if (!(pv_ptr->pv_status & PV_ACTIVE)) + inactive_flag = 'I'; + allocation_flag = 'A'; + if (!(pv_ptr->pv_allocatable & PV_ALLOCATABLE)) + allocation_flag = 'N'; + pv_name = strrchr(pv_ptr->pv_name+1,'/'); + if ( pv_name == 0) pv_name = pv_ptr->pv_name; + else pv_name++; + sz = sprintf(buf, + "[%c%c] %-21s %8d /%-6d " + "%8d /%-6d %8d /%-6d", + inactive_flag, + allocation_flag, + pv_name, + pv_ptr->pe_total * + pv_ptr->pe_size >> 1, + pv_ptr->pe_total, + pv_ptr->pe_allocated * + pv_ptr->pe_size >> 1, + pv_ptr->pe_allocated, + (pv_ptr->pe_total - + pv_ptr->pe_allocated) * + pv_ptr->pe_size >> 1, + pv_ptr->pe_total - + pv_ptr->pe_allocated); + return sz; +} + + #if defined CONFIG_LVM_PROC_FS && defined CONFIG_PROC_FS /* - * Support function /proc-Filesystem + * Support functions /proc-Filesystem */ + #define LVM_PROC_BUF ( i == 0 ? dummy_buf : &buf[sz]) -static int lvm_proc_get_info(char *page, char **start, off_t pos, int count) +/* + * provide global LVM information + */ +static int lvm_proc_get_global_info(char *page, char **start, off_t pos, int count, int *eof, void *data) { int c, i, l, p, v, vg_counter, pv_counter, lv_counter, lv_open_counter, - lv_open_total, pe_t_bytes, lv_block_exception_t_bytes, seconds; + lv_open_total, pe_t_bytes, hash_table_bytes, lv_block_exception_t_bytes, seconds; static off_t sz; off_t sz_last; - char allocation_flag, inactive_flag, rw_flag, stripes_flag; - char *lv_name, *pv_name; static char *buf = NULL; static char dummy_buf[160]; /* sized for 2 lines */ vg_t *vg_ptr; @@ -1022,13 +1288,16 @@ static int lvm_proc_get_info(char *page, char **start, off_t pos, int count) #ifdef DEBUG_LVM_PROC_GET_INFO printk(KERN_DEBUG - "%s - lvm_proc_get_info CALLED pos: %lu count: %d whence: %d\n", + "%s - lvm_proc_get_global_info CALLED pos: %lu count: %d whence: %d\n", lvm_name, pos, count, whence); #endif + MOD_INC_USE_COUNT; + if (pos == 0 || buf == NULL) { sz_last = vg_counter = pv_counter = lv_counter = lv_open_counter = \ - lv_open_total = pe_t_bytes = lv_block_exception_t_bytes = 0; + lv_open_total = pe_t_bytes = hash_table_bytes = \ + lv_block_exception_t_bytes = 0; /* search for activity */ for (v = 0; v < ABS_MAX_VG; v++) { @@ -1040,6 +1309,7 @@ static int lvm_proc_get_info(char *page, char **start, off_t pos, int count) for (l = 0; l < vg[v]->lv_max; l++) { if ((lv_ptr = vg_ptr->lv[l]) != NULL) { pe_t_bytes += lv_ptr->lv_allocated_le; + hash_table_bytes += lv_ptr->lv_snapshot_hash_table_size; if (lv_ptr->lv_block_exception != NULL) lv_block_exception_t_bytes += lv_ptr->lv_remap_end; if (lv_ptr->lv_open > 0) { @@ -1057,9 +1327,11 @@ static int lvm_proc_get_info(char *page, char **start, off_t pos, int count) if (buf != NULL) { #ifdef DEBUG_KFREE printk(KERN_DEBUG - "%s -- kfree %d\n", lvm_name, __LINE__); + "%s -- vfree %d\n", lvm_name, __LINE__); #endif - kfree(buf); + lock_kernel(); + vfree(buf); + unlock_kernel(); buf = NULL; } /* 2 times: first to get size to allocate buffer, @@ -1094,7 +1366,7 @@ static int lvm_proc_get_info(char *page, char **start, off_t pos, int count) vg_counter * sizeof(vg_t) + pv_counter * sizeof(pv_t) + lv_counter * sizeof(lv_t) + - pe_t_bytes + lv_block_exception_t_bytes + sz_last, + pe_t_bytes + hash_table_bytes + lv_block_exception_t_bytes + sz_last, lvm_iop_version); seconds = CURRENT_TIME - loadtime; @@ -1115,26 +1387,7 @@ static int lvm_proc_get_info(char *page, char **start, off_t pos, int count) for (v = 0; v < ABS_MAX_VG; v++) { /* volume group */ if ((vg_ptr = vg[v]) != NULL) { - inactive_flag = ' '; - if (!(vg_ptr->vg_status & VG_ACTIVE)) inactive_flag = 'I'; - sz += sprintf(LVM_PROC_BUF, - "\nVG: %c%s [%d PV, %d LV/%d open] " - " PE Size: %d KB\n" - " Usage [KB/PE]: %d /%d total " - "%d /%d used %d /%d free", - inactive_flag, - vg_ptr->vg_name, - vg_ptr->pv_cur, - vg_ptr->lv_cur, - vg_ptr->lv_open, - vg_ptr->pe_size >> 1, - vg_ptr->pe_size * vg_ptr->pe_total >> 1, - vg_ptr->pe_total, - vg_ptr->pe_allocated * vg_ptr->pe_size >> 1, - vg_ptr->pe_allocated, - (vg_ptr->pe_total - vg_ptr->pe_allocated) * - vg_ptr->pe_size >> 1, - vg_ptr->pe_total - vg_ptr->pe_allocated); + sz += lvm_vg_info(vg_ptr, LVM_PROC_BUF); /* physical volumes */ sz += sprintf(LVM_PROC_BUF, @@ -1143,32 +1396,8 @@ static int lvm_proc_get_info(char *page, char **start, off_t pos, int count) c = 0; for (p = 0; p < vg_ptr->pv_max; p++) { if ((pv_ptr = vg_ptr->pv[p]) != NULL) { - inactive_flag = 'A'; - if (!(pv_ptr->pv_status & PV_ACTIVE)) - inactive_flag = 'I'; - allocation_flag = 'A'; - if (!(pv_ptr->pv_allocatable & PV_ALLOCATABLE)) - allocation_flag = 'N'; - pv_name = strchr(pv_ptr->pv_name+1,'/'); - if ( pv_name == 0) pv_name = pv_ptr->pv_name; - else pv_name++; - sz += sprintf(LVM_PROC_BUF, - "[%c%c] %-21s %8d /%-6d " - "%8d /%-6d %8d /%-6d", - inactive_flag, - allocation_flag, - pv_name, - pv_ptr->pe_total * - pv_ptr->pe_size >> 1, - pv_ptr->pe_total, - pv_ptr->pe_allocated * - pv_ptr->pe_size >> 1, - pv_ptr->pe_allocated, - (pv_ptr->pe_total - - pv_ptr->pe_allocated) * - pv_ptr->pe_size >> 1, - pv_ptr->pe_total - - pv_ptr->pe_allocated); + sz += lvm_pv_info(pv_ptr, LVM_PROC_BUF); + c++; if (c < vg_ptr->pv_cur) sz += sprintf(LVM_PROC_BUF, @@ -1181,47 +1410,9 @@ static int lvm_proc_get_info(char *page, char **start, off_t pos, int count) "\n LV%s ", vg_ptr->lv_cur == 1 ? ": " : "s:"); c = 0; - for (l = 0; l < vg[v]->lv_max; l++) { + for (l = 0; l < vg_ptr->lv_max; l++) { if ((lv_ptr = vg_ptr->lv[l]) != NULL) { - inactive_flag = 'A'; - if (!(lv_ptr->lv_status & LV_ACTIVE)) - inactive_flag = 'I'; - rw_flag = 'R'; - if (lv_ptr->lv_access & LV_WRITE) - rw_flag = 'W'; - allocation_flag = 'D'; - if (lv_ptr->lv_allocation & LV_CONTIGUOUS) - allocation_flag = 'C'; - stripes_flag = 'L'; - if (lv_ptr->lv_stripes > 1) - stripes_flag = 'S'; - sz += sprintf(LVM_PROC_BUF, - "[%c%c%c%c", - inactive_flag, - rw_flag, - allocation_flag, - stripes_flag); - if (lv_ptr->lv_stripes > 1) - sz += sprintf(LVM_PROC_BUF, "%-2d", - lv_ptr->lv_stripes); - else - sz += sprintf(LVM_PROC_BUF, " "); - lv_name = strrchr(lv_ptr->lv_name, '/'); - if ( lv_name == 0) lv_name = lv_ptr->lv_name; - else lv_name++; - sz += sprintf(LVM_PROC_BUF, "] %-25s", lv_name); - if (strlen(lv_name) > 25) - sz += sprintf(LVM_PROC_BUF, - "\n "); - sz += sprintf(LVM_PROC_BUF, "%9d /%-6d ", - lv_ptr->lv_size >> 1, - lv_ptr->lv_size / vg[v]->pe_size); - - if (lv_ptr->lv_open == 0) - sz += sprintf(LVM_PROC_BUF, "close"); - else - sz += sprintf(LVM_PROC_BUF, "%dx open", - lv_ptr->lv_open); + sz += lvm_lv_info(vg_ptr, lv_ptr, LVM_PROC_BUF); c++; if (c < vg_ptr->lv_cur) sz += sprintf(LVM_PROC_BUF, @@ -1234,8 +1425,12 @@ static int lvm_proc_get_info(char *page, char **start, off_t pos, int count) } } if (buf == NULL) { - if ((buf = vmalloc(sz)) == NULL) { + lock_kernel(); + buf = vmalloc(sz); + unlock_kernel(); + if (buf == NULL) { sz = 0; + MOD_DEC_USE_COUNT; return sprintf(page, "%s - vmalloc error at line %d\n", lvm_name, __LINE__); } @@ -1243,8 +1438,11 @@ static int lvm_proc_get_info(char *page, char **start, off_t pos, int count) sz_last = sz; } } + MOD_DEC_USE_COUNT; if (pos > sz - 1) { + lock_kernel(); vfree(buf); + unlock_kernel(); buf = NULL; return 0; } @@ -1253,47 +1451,111 @@ static int lvm_proc_get_info(char *page, char **start, off_t pos, int count) return sz - pos; else return count; -} /* lvm_proc_get_info() */ +} /* lvm_proc_get_global_info() */ #endif /* #if defined CONFIG_LVM_PROC_FS && defined CONFIG_PROC_FS */ +/* + * provide VG information + */ +int lvm_proc_read_vg_info(char *page, char **start, off_t off, + int count, int *eof, void *data) { + int sz = 0; + vg_t *vg = data; + + sz += sprintf ( page+sz, "name: %s\n", vg->vg_name); + sz += sprintf ( page+sz, "size: %u\n", + vg->pe_total * vg->pe_size / 2); + sz += sprintf ( page+sz, "access: %u\n", vg->vg_access); + sz += sprintf ( page+sz, "status: %u\n", vg->vg_status); + sz += sprintf ( page+sz, "number: %u\n", vg->vg_number); + sz += sprintf ( page+sz, "LV max: %u\n", vg->lv_max); + sz += sprintf ( page+sz, "LV current: %u\n", vg->lv_cur); + sz += sprintf ( page+sz, "LV open: %u\n", vg->lv_open); + sz += sprintf ( page+sz, "PV max: %u\n", vg->pv_max); + sz += sprintf ( page+sz, "PV current: %u\n", vg->pv_cur); + sz += sprintf ( page+sz, "PV active: %u\n", vg->pv_act); + sz += sprintf ( page+sz, "PE size: %u\n", vg->pe_size / 2); + sz += sprintf ( page+sz, "PE total: %u\n", vg->pe_total); + sz += sprintf ( page+sz, "PE allocated: %u\n", vg->pe_allocated); + sz += sprintf ( page+sz, "uuid: %s\n", vg->vg_uuid); + + return sz; +} + + +/* + * provide LV information + */ +int lvm_proc_read_lv_info(char *page, char **start, off_t off, + int count, int *eof, void *data) { + int sz = 0; + lv_t *lv = data; + + sz += sprintf ( page+sz, "name: %s\n", lv->lv_name); + sz += sprintf ( page+sz, "size: %u\n", lv->lv_size); + sz += sprintf ( page+sz, "access: %u\n", lv->lv_access); + sz += sprintf ( page+sz, "status: %u\n", lv->lv_status); + sz += sprintf ( page+sz, "number: %u\n", lv->lv_number); + sz += sprintf ( page+sz, "open: %u\n", lv->lv_open); + sz += sprintf ( page+sz, "allocation: %u\n", lv->lv_allocation); + sz += sprintf ( page+sz, "device: %02u:%02u\n", + MAJOR(lv->lv_dev), MINOR(lv->lv_dev)); + + return sz; +} + + +/* + * provide PV information + */ +int lvm_proc_read_pv_info(char *page, char **start, off_t off, + int count, int *eof, void *data) { + int sz = 0; + pv_t *pv = data; + + sz += sprintf ( page+sz, "name: %s\n", pv->pv_name); + sz += sprintf ( page+sz, "size: %u\n", pv->pv_size); + sz += sprintf ( page+sz, "status: %u\n", pv->pv_status); + sz += sprintf ( page+sz, "number: %u\n", pv->pv_number); + sz += sprintf ( page+sz, "allocatable: %u\n", pv->pv_allocatable); + sz += sprintf ( page+sz, "LV current: %u\n", pv->lv_cur); + sz += sprintf ( page+sz, "PE size: %u\n", pv->pe_size / 2); + sz += sprintf ( page+sz, "PE total: %u\n", pv->pe_total); + sz += sprintf ( page+sz, "PE allocated: %u\n", pv->pe_allocated); + sz += sprintf ( page+sz, "device: %02u:%02u\n", + MAJOR(pv->pv_dev), MINOR(pv->pv_dev)); + sz += sprintf ( page+sz, "uuid: %s\n", pv->pv_uuid); + + + return sz; +} + + /* * block device support function for /usr/src/linux/drivers/block/ll_rw_blk.c * (see init_module/lvm_init) */ static int lvm_map(struct buffer_head *bh, int rw) { - int minor = MINOR(bh->b_rdev); + int minor = MINOR(bh->b_dev); + int ret = 0; ulong index; ulong pe_start; ulong size = bh->b_size >> 9; - ulong rsector_tmp = bh->b_rsector; + ulong rsector_tmp = bh->b_blocknr * size; ulong rsector_sav; - kdev_t rdev_tmp = bh->b_rdev; + kdev_t rdev_tmp = bh->b_dev; kdev_t rdev_sav; - lv_t *lv = vg[VG_BLK(minor)]->lv[LV_BLK(minor)]; + vg_t *vg_this = vg[VG_BLK(minor)]; + lv_t *lv = vg_this->lv[LV_BLK(minor)]; if (!(lv->lv_status & LV_ACTIVE)) { printk(KERN_ALERT "%s - lvm_map: ll_rw_blk for inactive LV %s\n", lvm_name, lv->lv_name); - goto error; - } -/* - if ( lv->lv_access & LV_SNAPSHOT) - printk ( "%s -- %02d:%02d block: %lu rw: %d\n", lvm_name, MAJOR ( bh->b_dev), MINOR ( bh->b_dev), bh->b_blocknr, rw); - */ - - /* take care of snapshot chunk writes before - check for writable logical volume */ - if ((lv->lv_access & LV_SNAPSHOT) && - MAJOR(bh->b_rdev) != 0 && - MAJOR(bh->b_rdev) != MAJOR_NR && - (rw == WRITEA || rw == WRITE)) - { - printk ( "%s -- doing snapshot write for %02d:%02d[%02d:%02d] b_blocknr: %lu b_rsector: %lu\n", lvm_name, MAJOR ( bh->b_dev), MINOR ( bh->b_dev), MAJOR ( bh->b_rdev), MINOR ( bh->b_rdev), bh->b_blocknr, bh->b_rsector); - goto error; + return -1; } if ((rw == WRITE || rw == WRITEA) && @@ -1301,7 +1563,7 @@ static int lvm_map(struct buffer_head *bh, int rw) printk(KERN_CRIT "%s - lvm_map: ll_rw_blk write for readonly LV %s\n", lvm_name, lv->lv_name); - goto error; + return -1; } #ifdef DEBUG_MAP printk(KERN_DEBUG @@ -1315,9 +1577,10 @@ static int lvm_map(struct buffer_head *bh, int rw) if (rsector_tmp + size > lv->lv_size) { printk(KERN_ALERT - "%s - lvm_map *rsector: %lu or size: %lu wrong for" - " minor: %2d\n", lvm_name, rsector_tmp, size, minor); - goto error; + "%s - lvm_map access beyond end of device; *rsector: " + "%lu or size: %lu wrong for minor: %2d\n", + lvm_name, rsector_tmp, size, minor); + return -1; } rsector_sav = rsector_tmp; rdev_sav = rdev_tmp; @@ -1326,10 +1589,10 @@ lvm_second_remap: /* linear mapping */ if (lv->lv_stripes < 2) { /* get the index */ - index = rsector_tmp / vg[VG_BLK(minor)]->pe_size; + index = rsector_tmp / vg_this->pe_size; pe_start = lv->lv_current_pe[index].pe; rsector_tmp = lv->lv_current_pe[index].pe + - (rsector_tmp % vg[VG_BLK(minor)]->pe_size); + (rsector_tmp % vg_this->pe_size); rdev_tmp = lv->lv_current_pe[index].dev; #ifdef DEBUG_MAP @@ -1347,7 +1610,7 @@ lvm_second_remap: ulong stripe_index; ulong stripe_length; - stripe_length = vg[VG_BLK(minor)]->pe_size * lv->lv_stripes; + stripe_length = vg_this->pe_size * lv->lv_stripes; stripe_index = (rsector_tmp % stripe_length) / lv->lv_stripesize; index = rsector_tmp / stripe_length + (stripe_index % lv->lv_stripes) * @@ -1379,7 +1642,7 @@ lvm_second_remap: if (rdev_tmp == pe_lock_req.data.pv_dev && rsector_tmp >= pe_lock_req.data.pv_offset && rsector_tmp < (pe_lock_req.data.pv_offset + - vg[VG_BLK(minor)]->pe_size)) { + vg_this->pe_size)) { sleep_on(&lvm_map_wait); rsector_tmp = rsector_sav; rdev_tmp = rdev_sav; @@ -1393,7 +1656,7 @@ lvm_second_remap: lv->lv_current_pe[index].reads++; /* snapshot volume exception handling on physical device address base */ - if (lv->lv_access & (LV_SNAPSHOT | LV_SNAPSHOT_ORG)) { + if (lv->lv_access & (LV_SNAPSHOT|LV_SNAPSHOT_ORG)) { /* original logical volume */ if (lv->lv_access & LV_SNAPSHOT_ORG) { if (rw == WRITE || rw == WRITEA) @@ -1404,6 +1667,8 @@ lvm_second_remap: for (lv_ptr = lv->lv_snapshot_next; lv_ptr != NULL; lv_ptr = lv_ptr->lv_snapshot_next) { + /* Check for inactive snapshot */ + if (!(lv_ptr->lv_status & LV_ACTIVE)) continue; down(&lv->lv_snapshot_org->lv_snapshot_sem); /* do we still have exception storage for this snapshot free? */ if (lv_ptr->lv_block_exception != NULL) { @@ -1414,11 +1679,13 @@ lvm_second_remap: pe_start, lv_ptr)) { /* create a new mapping */ - lvm_snapshot_COW(rdev_tmp, - rsector_tmp, - pe_start, - rsector_sav, - lv_ptr); + if (!(ret = lvm_snapshot_COW(rdev_tmp, + rsector_tmp, + pe_start, + rsector_sav, + lv_ptr))) + ret = lvm_write_COW_table_block(vg_this, + lv_ptr); } rdev_tmp = rdev_sav; rsector_tmp = rsector_sav; @@ -1437,11 +1704,7 @@ lvm_second_remap: bh->b_rdev = rdev_tmp; bh->b_rsector = rsector_tmp; - return 1; - - error: - buffer_IO_error(bh); - return -1; + return ret; } /* lvm_map() */ @@ -1487,7 +1750,9 @@ static void lvm_dummy_device_request(request_queue_t * t) /* * make request function */ -static int lvm_make_request_fn(request_queue_t *q, int rw, struct buffer_head *bh) +static int lvm_make_request_fn(request_queue_t *q, + int rw, + struct buffer_head *bh) { if (lvm_map(bh, rw)<0) return 0; /* failure, buffer_IO_error has been called, don't recurse */ @@ -1495,12 +1760,6 @@ static int lvm_make_request_fn(request_queue_t *q, int rw, struct buffer_head *b return 1; /* all ok, mapping done, call lower level driver */ } -/* - * plug device function is a noop because plugging has to happen - * in the queue of the physical blockdevice to allow the - * elevator to do a better job. - */ -static void lvm_plug_device_noop(request_queue_t *q, kdev_t dev) { } /******************************************************************** * @@ -1563,7 +1822,8 @@ static int lvm_do_pe_lock_unlock(vg_t *vg_ptr, void *arg) case UNLOCK_PE: pe_lock_req.lock = UNLOCK_PE; - pe_lock_req.data.lv_dev = pe_lock_req.data.pv_dev = 0; + pe_lock_req.data.lv_dev = \ + pe_lock_req.data.pv_dev = \ pe_lock_req.data.pv_offset = 0; wake_up(&lvm_map_wait); break; @@ -1593,8 +1853,7 @@ static int lvm_do_le_remap(vg_t *vg_ptr, void *arg) if (lv_ptr != NULL && strcmp(lv_ptr->lv_name, le_remap_req.lv_name) == 0) { - for (le = 0; le < lv_ptr->lv_allocated_le; - le++) { + for (le = 0; le < lv_ptr->lv_allocated_le; le++) { if (lv_ptr->lv_current_pe[le].dev == le_remap_req.old_dev && lv_ptr->lv_current_pe[le].pe == @@ -1618,12 +1877,11 @@ static int lvm_do_le_remap(vg_t *vg_ptr, void *arg) */ int lvm_do_vg_create(int minor, void *arg) { - int snaporg_minor = 0; - ulong l, p; + int ret = 0; + ulong l, ls = 0, p, size; lv_t lv; vg_t *vg_ptr; - pv_t *pv_ptr; - lv_t *lv_ptr; + lv_t **snap_lv_ptr; if (vg[VG_CHR(minor)] != NULL) return -EPERM; @@ -1639,18 +1897,11 @@ int lvm_do_vg_create(int minor, void *arg) return -EFAULT; } - vg_devfs_handle[vg_ptr->vg_number] = devfs_mk_dir(0, vg_ptr->vg_name, NULL); - ch_devfs_handle[vg_ptr->vg_number] = devfs_register( - vg_devfs_handle[vg_ptr->vg_number] , "group", - DEVFS_FL_DEFAULT, LVM_CHAR_MAJOR, vg_ptr->vg_number, - S_IFCHR | S_IRUSR | S_IWUSR | S_IRGRP, - &lvm_chr_fops, NULL); - /* we are not that active so far... */ vg_ptr->vg_status &= ~VG_ACTIVE; vg[VG_CHR(minor)] = vg_ptr; - vg[VG_CHR(minor)]->pe_allocated = 0; + if (vg_ptr->pv_max > ABS_MAX_PV) { printk(KERN_WARNING "%s -- Can't activate VG: ABS_MAX_PV too small\n", @@ -1667,38 +1918,30 @@ int lvm_do_vg_create(int minor, void *arg) vg_ptr = NULL; return -EPERM; } + /* get the physical volume structures */ vg_ptr->pv_act = vg_ptr->pv_cur = 0; for (p = 0; p < vg_ptr->pv_max; p++) { /* user space address */ if ((pvp = vg_ptr->pv[p]) != NULL) { - pv_ptr = vg_ptr->pv[p] = kmalloc(sizeof(pv_t),GFP_KERNEL); - if (pv_ptr == NULL) { - printk(KERN_CRIT - "%s -- VG_CREATE: kmalloc error PV at line %d\n", - lvm_name, __LINE__); + ret = lvm_do_pv_create(pvp, vg_ptr, p); + if ( ret != 0) { lvm_do_vg_remove(minor); - return -ENOMEM; - } - if (copy_from_user(pv_ptr, pvp, sizeof(pv_t)) != 0) { - lvm_do_vg_remove(minor); - return -EFAULT; + return ret; } - /* We don't need the PE list - in kernel space as with LVs pe_t list (see below) */ - pv_ptr->pe = NULL; - pv_ptr->pe_allocated = 0; - pv_ptr->pv_status = PV_ACTIVE; - vg_ptr->pv_act++; - vg_ptr->pv_cur++; - -#ifdef LVM_GET_INODE - /* insert a dummy inode for fs_may_mount */ - pv_ptr->inode = lvm_get_inode(pv_ptr->pv_dev); -#endif } } + size = vg_ptr->lv_max * sizeof(lv_t *); + if ((snap_lv_ptr = vmalloc ( size)) == NULL) { + printk(KERN_CRIT + "%s -- VG_CREATE: vmalloc error snapshot LVs at line %d\n", + lvm_name, __LINE__); + lvm_do_vg_remove(minor); + return -EFAULT; + } + memset(snap_lv_ptr, 0, size); + /* get the logical volume structures */ vg_ptr->lv_cur = 0; for (l = 0; l < vg_ptr->lv_max; l++) { @@ -1708,7 +1951,14 @@ int lvm_do_vg_create(int minor, void *arg) lvm_do_vg_remove(minor); return -EFAULT; } + if ( lv.lv_access & LV_SNAPSHOT) { + snap_lv_ptr[ls] = lvp; + vg_ptr->lv[l] = NULL; + ls++; + continue; + } vg_ptr->lv[l] = NULL; + /* only create original logical volumes for now */ if (lvm_do_lv_create(minor, lv.lv_name, &lv) != 0) { lvm_do_vg_remove(minor); return -EFAULT; @@ -1718,55 +1968,41 @@ int lvm_do_vg_create(int minor, void *arg) /* Second path to correct snapshot logical volumes which are not in place during first path above */ - for (l = 0; l < vg_ptr->lv_max; l++) { - if ((lv_ptr = vg_ptr->lv[l]) != NULL && - vg_ptr->lv[l]->lv_access & LV_SNAPSHOT) { - snaporg_minor = lv_ptr->lv_snapshot_minor; - if (vg_ptr->lv[LV_BLK(snaporg_minor)] != NULL) { - /* get pointer to original logical volume */ - lv_ptr = vg_ptr->lv[l]->lv_snapshot_org = - vg_ptr->lv[LV_BLK(snaporg_minor)]; - - /* set necessary fields of original logical volume */ - lv_ptr->lv_access |= LV_SNAPSHOT_ORG; - lv_ptr->lv_snapshot_minor = 0; - lv_ptr->lv_snapshot_org = lv_ptr; - lv_ptr->lv_snapshot_prev = NULL; + for (l = 0; l < ls; l++) { + lvp = snap_lv_ptr[l]; + if (copy_from_user(&lv, lvp, sizeof(lv_t)) != 0) { + lvm_do_vg_remove(minor); + return -EFAULT; + } + if (lvm_do_lv_create(minor, lv.lv_name, &lv) != 0) { + lvm_do_vg_remove(minor); + return -EFAULT; + } + } - /* find last snapshot logical volume in the chain */ - while (lv_ptr->lv_snapshot_next != NULL) - lv_ptr = lv_ptr->lv_snapshot_next; +#ifdef CONFIG_DEVFS_FS + vg_devfs_handle[vg_ptr->vg_number] = devfs_mk_dir(0, vg_ptr->vg_name, NULL); + ch_devfs_handle[vg_ptr->vg_number] = devfs_register( + vg_devfs_handle[vg_ptr->vg_number] , "group", + DEVFS_FL_DEFAULT, LVM_CHAR_MAJOR, vg_ptr->vg_number, + S_IFCHR | S_IRUSR | S_IWUSR | S_IRGRP, + &lvm_chr_fops, NULL); +#endif - /* set back pointer to this last one in our new logical volume */ - vg_ptr->lv[l]->lv_snapshot_prev = lv_ptr; +#if defined CONFIG_LVM_PROC_FS && defined CONFIG_PROC_FS + lvm_do_create_proc_entry_of_vg ( vg_ptr); +#endif - /* last logical volume now points to our new snapshot volume */ - lv_ptr->lv_snapshot_next = vg_ptr->lv[l]; + vfree(snap_lv_ptr); - /* now point to the new one */ - lv_ptr = lv_ptr->lv_snapshot_next; + vg_count++; - /* set necessary fields of new snapshot logical volume */ - lv_ptr->lv_snapshot_next = NULL; - lv_ptr->lv_current_pe = - vg_ptr->lv[LV_BLK(snaporg_minor)]->lv_current_pe; - lv_ptr->lv_allocated_le = - vg_ptr->lv[LV_BLK(snaporg_minor)]->lv_allocated_le; - lv_ptr->lv_current_le = - vg_ptr->lv[LV_BLK(snaporg_minor)]->lv_current_le; - lv_ptr->lv_size = - vg_ptr->lv[LV_BLK(snaporg_minor)]->lv_size; - } - } - } - vg_count++; + MOD_INC_USE_COUNT; /* let's go active */ vg_ptr->vg_status |= VG_ACTIVE; - MOD_INC_USE_COUNT; - return 0; } /* lvm_do_vg_create() */ @@ -1776,26 +2012,18 @@ int lvm_do_vg_create(int minor, void *arg) */ static int lvm_do_vg_extend(vg_t *vg_ptr, void *arg) { + int ret = 0; uint p; pv_t *pv_ptr; if (vg_ptr == NULL) return -ENXIO; if (vg_ptr->pv_cur < vg_ptr->pv_max) { for (p = 0; p < vg_ptr->pv_max; p++) { - if (vg_ptr->pv[p] == NULL) { - if ((pv_ptr = vg_ptr->pv[p] = kmalloc(sizeof(pv_t),GFP_KERNEL)) == NULL) { - printk(KERN_CRIT - "%s -- VG_EXTEND: kmalloc error PV at line %d\n", - lvm_name, __LINE__); - return -ENOMEM; - } - if (copy_from_user(pv_ptr, arg, sizeof(pv_t)) != 0) { - kfree(pv_ptr); - vg_ptr->pv[p] = NULL; - return -EFAULT; - } + if ( ( pv_ptr = vg_ptr->pv[p]) == NULL) { + ret = lvm_do_pv_create(arg, vg_ptr, p); + lvm_do_create_proc_entry_of_pv ( vg_ptr, pv_ptr); + if ( ret != 0) return ret; - pv_ptr->pv_status = PV_ACTIVE; /* We don't need the PE list in kernel space like LVs pe_t list */ pv_ptr->pe = NULL; @@ -1818,8 +2046,7 @@ return -EPERM; /* * character device support function VGDA reduce */ -static int lvm_do_vg_reduce(vg_t *vg_ptr, void *arg) -{ +static int lvm_do_vg_reduce(vg_t *vg_ptr, void *arg) { uint p; pv_t *pv_ptr; @@ -1837,10 +2064,7 @@ static int lvm_do_vg_reduce(vg_t *vg_ptr, void *arg) pv_ptr->pe_total; vg_ptr->pv_cur--; vg_ptr->pv_act--; -#ifdef LVM_GET_INODE - lvm_clear_inode(pv_ptr->inode); -#endif - kfree(pv_ptr); + lvm_do_pv_remove(vg_ptr, p); /* Make PV pointer array contiguous */ for (; p < vg_ptr->pv_max - 1; p++) vg_ptr->pv[p] = vg_ptr->pv[p + 1]; @@ -1852,6 +2076,53 @@ static int lvm_do_vg_reduce(vg_t *vg_ptr, void *arg) } /* lvm_do_vg_reduce */ +/* + * character device support function VG rename + */ +static int lvm_do_vg_rename(vg_t *vg_ptr, void *arg) +{ + int l = 0, p = 0, len = 0; + char vg_name[NAME_LEN] = { 0,}; + char lv_name[NAME_LEN] = { 0,}; + char *ptr = NULL; + lv_t *lv_ptr = NULL; + pv_t *pv_ptr = NULL; + + if (copy_from_user(vg_name, arg, sizeof(vg_name)) != 0) + return -EFAULT; + +#if defined CONFIG_LVM_PROC_FS && defined CONFIG_PROC_FS + lvm_do_remove_proc_entry_of_vg ( vg_ptr); +#endif + + strncpy ( vg_ptr->vg_name, vg_name, sizeof ( vg_name)-1); + for ( l = 0; l < vg_ptr->lv_max; l++) + { + if ((lv_ptr = vg_ptr->lv[l]) == NULL) continue; + strncpy(lv_ptr->vg_name, vg_name, sizeof ( vg_name)); + ptr = strrchr(lv_ptr->lv_name, '/'); + if (ptr == NULL) ptr = lv_ptr->lv_name; + strncpy(lv_name, ptr, sizeof ( lv_name)); + len = sizeof(LVM_DIR_PREFIX); + strcpy(lv_ptr->lv_name, LVM_DIR_PREFIX); + strncat(lv_ptr->lv_name, vg_name, NAME_LEN - len); + len += strlen ( vg_name); + strncat(lv_ptr->lv_name, lv_name, NAME_LEN - len); + } + for ( p = 0; p < vg_ptr->pv_max; p++) + { + if ( (pv_ptr = vg_ptr->pv[p]) == NULL) continue; + strncpy(pv_ptr->vg_name, vg_name, NAME_LEN); + } + +#if defined CONFIG_LVM_PROC_FS && defined CONFIG_PROC_FS + lvm_do_create_proc_entry_of_vg ( vg_ptr); +#endif + + return 0; +} /* lvm_do_vg_rename */ + + /* * character device support function VGDA remove */ @@ -1873,9 +2144,6 @@ static int lvm_do_vg_remove(int minor) /* let's go inactive */ vg_ptr->vg_status &= ~VG_ACTIVE; - devfs_unregister (ch_devfs_handle[vg_ptr->vg_number]); - devfs_unregister (vg_devfs_handle[vg_ptr->vg_number]); - /* free LVs */ /* first free snapshot logical volumes */ for (i = 0; i < vg_ptr->lv_max; i++) { @@ -1902,17 +2170,23 @@ static int lvm_do_vg_remove(int minor) printk(KERN_DEBUG "%s -- kfree %d\n", lvm_name, __LINE__); #endif -#ifdef LVM_GET_INODE - lvm_clear_inode(pv_ptr->inode); -#endif - kfree(pv_ptr); - vg[VG_CHR(minor)]->pv[i] = NULL; + lvm_do_pv_remove(vg_ptr, i); } } +#ifdef CONFIG_DEVFS_FS + devfs_unregister (ch_devfs_handle[vg_ptr->vg_number]); + devfs_unregister (vg_devfs_handle[vg_ptr->vg_number]); +#endif + +#if defined CONFIG_LVM_PROC_FS && defined CONFIG_PROC_FS + lvm_do_remove_proc_entry_of_vg ( vg_ptr); +#endif + #ifdef DEBUG_KFREE printk(KERN_DEBUG "%s -- kfree %d\n", lvm_name, __LINE__); #endif + kfree(vg_ptr); vg[VG_CHR(minor)] = NULL; @@ -1924,14 +2198,69 @@ static int lvm_do_vg_remove(int minor) } /* lvm_do_vg_remove() */ +/* + * character device support function physical volume create + */ +static int lvm_do_pv_create(pv_t *pvp, vg_t *vg_ptr, ulong p) { + pv_t *pv_ptr = NULL; + + pv_ptr = vg_ptr->pv[p] = kmalloc(sizeof(pv_t),GFP_KERNEL); + if (pv_ptr == NULL) { + printk(KERN_CRIT + "%s -- VG_CREATE: kmalloc error PV at line %d\n", + lvm_name, __LINE__); + return -ENOMEM; + } + if (copy_from_user(pv_ptr, pvp, sizeof(pv_t)) != 0) { + return -EFAULT; + } + /* We don't need the PE list + in kernel space as with LVs pe_t list (see below) */ + pv_ptr->pe = NULL; + pv_ptr->pe_allocated = 0; + pv_ptr->pv_status = PV_ACTIVE; + vg_ptr->pv_act++; + vg_ptr->pv_cur++; + +#ifdef LVM_GET_INODE + /* insert a dummy inode for fs_may_mount */ + pv_ptr->inode = lvm_get_inode(pv_ptr->pv_dev); +#endif + + return 0; +} /* lvm_do_pv_create() */ + + +/* + * character device support function physical volume create + */ +static int lvm_do_pv_remove(vg_t *vg_ptr, ulong p) { + pv_t *pv_ptr = vg_ptr->pv[p]; + +#if defined CONFIG_LVM_PROC_FS && defined CONFIG_PROC_FS + lvm_do_remove_proc_entry_of_pv ( vg_ptr, pv_ptr); +#endif + vg_ptr->pe_total -= + pv_ptr->pe_total; + vg_ptr->pv_cur--; + vg_ptr->pv_act--; +#ifdef LVM_GET_INODE + lvm_clear_inode(pv_ptr->inode); +#endif + kfree(pv_ptr); + vg_ptr->pv[p] = NULL; + + return 0; +} + + /* * character device support function logical volume create */ static int lvm_do_lv_create(int minor, char *lv_name, lv_t *lv) { - int l, le, l_new, p, size; + int e, ret, l, le, l_new, p, size; ulong lv_status_save; - char *lv_tmp, *lv_buf = NULL; lv_block_exception_t *lvbe = lv->lv_block_exception; vg_t *vg_ptr = vg[VG_CHR(minor)]; lv_t *lv_ptr = NULL; @@ -1946,7 +2275,7 @@ static int lvm_do_lv_create(int minor, char *lv_name, lv_t *lv) return -EEXIST; } - /* in case of lv_remove(), lv_create() pair; for eg. lvrename does this */ + /* in case of lv_remove(), lv_create() pair */ l_new = -1; if (vg_ptr->lv[lv->lv_number] == NULL) l_new = lv->lv_number; @@ -1957,7 +2286,7 @@ static int lvm_do_lv_create(int minor, char *lv_name, lv_t *lv) } } if (l_new == -1) return -EPERM; - else l = l_new; + else l = l_new; if ((lv_ptr = kmalloc(sizeof(lv_t),GFP_KERNEL)) == NULL) {; printk(KERN_CRIT "%s -- LV_CREATE: kmalloc error LV at line %d\n", @@ -1970,10 +2299,16 @@ static int lvm_do_lv_create(int minor, char *lv_name, lv_t *lv) lv_status_save = lv_ptr->lv_status; lv_ptr->lv_status &= ~LV_ACTIVE; lv_ptr->lv_snapshot_org = \ - lv_ptr->lv_snapshot_prev = \ - lv_ptr->lv_snapshot_next = NULL; + lv_ptr->lv_snapshot_prev = \ + lv_ptr->lv_snapshot_next = NULL; lv_ptr->lv_block_exception = NULL; + lv_ptr->lv_iobuf = NULL; + lv_ptr->lv_snapshot_hash_table = NULL; + lv_ptr->lv_snapshot_hash_table_size = 0; + lv_ptr->lv_snapshot_hash_mask = 0; + lv_ptr->lv_COW_table_page = NULL; init_MUTEX(&lv_ptr->lv_snapshot_sem); + lv_ptr->lv_snapshot_use_rate = 0; vg_ptr->lv[l] = lv_ptr; /* get the PE structures from user space if this @@ -2032,7 +2367,7 @@ static int lvm_do_lv_create(int minor, char *lv_name, lv_t *lv) vg[VG_CHR(minor)]->lv[l] = NULL; return -EFAULT; } - /* get pointer to original logical volume */ + /* point to the original logical volume */ lv_ptr = lv_ptr->lv_snapshot_org; lv_ptr->lv_snapshot_minor = 0; @@ -2043,7 +2378,8 @@ static int lvm_do_lv_create(int minor, char *lv_name, lv_t *lv) lv_ptr = lv_ptr->lv_snapshot_next; /* now lv_ptr points to the last existing snapshot in the chain */ vg_ptr->lv[l]->lv_snapshot_prev = lv_ptr; - /* our new one now back points to the previous last in the chain */ + /* our new one now back points to the previous last in the chain + which can be the original logical volume */ lv_ptr = vg_ptr->lv[l]; /* now lv_ptr points to our new last snapshot logical volume */ lv_ptr->lv_snapshot_org = lv_ptr->lv_snapshot_prev->lv_snapshot_org; @@ -2054,16 +2390,19 @@ static int lvm_do_lv_create(int minor, char *lv_name, lv_t *lv) lv_ptr->lv_size = lv_ptr->lv_snapshot_org->lv_size; lv_ptr->lv_stripes = lv_ptr->lv_snapshot_org->lv_stripes; lv_ptr->lv_stripesize = lv_ptr->lv_snapshot_org->lv_stripesize; + if ((ret = lvm_snapshot_alloc(lv_ptr)) != 0) { - int err = lvm_snapshot_alloc(lv_ptr); - if (err) - { - vfree(lv_ptr->lv_block_exception); - kfree(lv_ptr); - vg[VG_CHR(minor)]->lv[l] = NULL; - return err; - } + vfree(lv_ptr->lv_block_exception); + kfree(lv_ptr); + vg[VG_CHR(minor)]->lv[l] = NULL; + return ret; } + for ( e = 0; e < lv_ptr->lv_remap_ptr; e++) + lvm_hash_link (lv_ptr->lv_block_exception + e, lv_ptr->lv_block_exception[e].rdev_org, lv_ptr->lv_block_exception[e].rsector_org, lv_ptr); + /* need to fill the COW exception table data + into the page for disk i/o */ + lvm_snapshot_fill_COW_page(vg_ptr, lv_ptr); + init_waitqueue_head(&lv_ptr->lv_snapshot_wait); } else { vfree(lv_ptr->lv_block_exception); kfree(lv_ptr); @@ -2083,12 +2422,15 @@ static int lvm_do_lv_create(int minor, char *lv_name, lv_t *lv) lvm_size[MINOR(lv_ptr->lv_dev)] = lv_ptr->lv_size >> 1; vg_lv_map[MINOR(lv_ptr->lv_dev)].vg_number = vg_ptr->vg_number; vg_lv_map[MINOR(lv_ptr->lv_dev)].lv_number = lv_ptr->lv_number; - read_ahead[MAJOR_NR] = lv_ptr->lv_read_ahead = LVM_CORRECT_READ_AHEAD(lv_ptr->lv_read_ahead); + LVM_CORRECT_READ_AHEAD(lv_ptr->lv_read_ahead); vg_ptr->lv_cur++; lv_ptr->lv_status = lv_status_save; - strtok(lv->lv_name, "/"); /* /dev */ +#ifdef CONFIG_DEVFS_FS + { + char *lv_tmp, *lv_buf = NULL; + strtok(lv->lv_name, "/"); /* /dev */ while((lv_tmp = strtok(NULL, "/")) != NULL) lv_buf = lv_tmp; @@ -2097,15 +2439,43 @@ static int lvm_do_lv_create(int minor, char *lv_name, lv_t *lv) DEVFS_FL_DEFAULT, LVM_BLK_MAJOR, lv->lv_number, S_IFBLK | S_IRUSR | S_IWUSR | S_IRGRP, &lvm_blk_dops, NULL); + } +#endif + +#if defined CONFIG_LVM_PROC_FS && defined CONFIG_PROC_FS + lvm_do_create_proc_entry_of_lv ( vg_ptr, lv_ptr); +#endif /* optionally add our new snapshot LV */ if (lv_ptr->lv_access & LV_SNAPSHOT) { /* sync the original logical volume */ fsync_dev(lv_ptr->lv_snapshot_org->lv_dev); +#ifdef LVM_VFS_ENHANCEMENT + /* VFS function call to sync and lock the filesystem */ + fsync_dev_lockfs(lv_ptr->lv_snapshot_org->lv_dev); +#endif + lv_ptr->lv_snapshot_org->lv_access |= LV_SNAPSHOT_ORG; + lv_ptr->lv_access &= ~LV_SNAPSHOT_ORG; /* put ourselve into the chain */ lv_ptr->lv_snapshot_prev->lv_snapshot_next = lv_ptr; - lv_ptr->lv_snapshot_org->lv_access |= LV_SNAPSHOT_ORG; } + + /* activate the logical volume */ + lv_ptr->lv_status |= LV_ACTIVE; + if ( lv_ptr->lv_access & LV_WRITE) + set_device_ro(lv_ptr->lv_dev, 0); + else + set_device_ro(lv_ptr->lv_dev, 1); + +#ifdef LVM_VFS_ENHANCEMENT +/* VFS function call to unlock the filesystem */ + if (lv_ptr->lv_access & LV_SNAPSHOT) { + unlockfs(lv_ptr->lv_snapshot_org->lv_dev); + } +#endif + + lv_ptr->vg = vg_ptr; + return 0; } /* lvm_do_lv_create() */ @@ -2176,7 +2546,7 @@ static int lvm_do_lv_remove(int minor, char *lv_name, int l) } } vfree(lv_ptr->lv_current_pe); - /* LV_SNAPSHOT */ + /* LV_SNAPSHOT */ } else { /* remove this snapshot logical volume from the chain */ lv_ptr->lv_snapshot_prev->lv_snapshot_next = lv_ptr->lv_snapshot_next; @@ -2190,7 +2560,13 @@ static int lvm_do_lv_remove(int minor, char *lv_name, int l) lvm_snapshot_release(lv_ptr); } +#ifdef CONFIG_DEVFS_FS devfs_unregister(lv_devfs_handle[lv_ptr->lv_number]); +#endif + +#if defined CONFIG_LVM_PROC_FS && defined CONFIG_PROC_FS + lvm_do_remove_proc_entry_of_lv ( vg_ptr, lv_ptr); +#endif #ifdef DEBUG_KFREE printk(KERN_DEBUG "%s -- kfree %d\n", lvm_name, __LINE__); @@ -2207,8 +2583,7 @@ static int lvm_do_lv_remove(int minor, char *lv_name, int l) */ static int lvm_do_lv_extend_reduce(int minor, char *lv_name, lv_t *lv) { - int l, le, p, size, old_allocated_le; - uint32_t end, lv_status_save; + ulong end, l, le, p, size, old_allocated_le; vg_t *vg_ptr = vg[VG_CHR(minor)]; lv_t *lv_ptr; pe_t *pe; @@ -2224,12 +2599,75 @@ static int lvm_do_lv_extend_reduce(int minor, char *lv_name, lv_t *lv) lv_ptr = vg_ptr->lv[l]; /* check for active snapshot */ - if (lv->lv_access & (LV_SNAPSHOT | LV_SNAPSHOT_ORG)) return -EPERM; + if (lv->lv_access & LV_SNAPSHOT) + { + ulong e; + lv_block_exception_t *lvbe, *lvbe_old; + struct list_head * lvs_hash_table_old; + + if (lv->lv_block_exception == NULL) return -ENXIO; + size = lv->lv_remap_end * sizeof ( lv_block_exception_t); + if ((lvbe = vmalloc(size)) == NULL) + { + printk(KERN_CRIT + "%s -- lvm_do_lv_extend_reduce: vmalloc error LV_BLOCK_EXCEPTION " + "of %lu Byte at line %d\n", + lvm_name, size, __LINE__); + return -ENOMEM; + } + if (lv->lv_remap_end > lv_ptr->lv_remap_end) + { + if (copy_from_user(lvbe, lv->lv_block_exception, size)) + { + vfree(lvbe); + return -EFAULT; + } + } + + lvbe_old = lv_ptr->lv_block_exception; + lvs_hash_table_old = lv_ptr->lv_snapshot_hash_table; + + /* we need to play on the safe side here... */ + down(&lv_ptr->lv_snapshot_org->lv_snapshot_sem); + if (lv_ptr->lv_block_exception == NULL || + lv_ptr->lv_remap_ptr > lv_ptr->lv_remap_end) + { + up(&lv_ptr->lv_snapshot_org->lv_snapshot_sem); + vfree(lvbe); + return -EPERM; + } + memcpy(lvbe, + lv_ptr->lv_block_exception, + (lv->lv_remap_end > lv_ptr->lv_remap_end ? lv_ptr->lv_remap_ptr : lv->lv_remap_end) * sizeof(lv_block_exception_t)); + + lv_ptr->lv_block_exception = lvbe; + lv_ptr->lv_remap_end = lv->lv_remap_end; + if (lvm_snapshot_alloc_hash_table(lv_ptr) != 0) + { + lvm_drop_snapshot(lv_ptr, "hash_alloc"); + up(&lv_ptr->lv_snapshot_org->lv_snapshot_sem); + vfree(lvbe_old); + vfree(lvs_hash_table_old); + return 1; + } + + for (e = 0; e < lv_ptr->lv_remap_ptr; e++) + lvm_hash_link (lv_ptr->lv_block_exception + e, lv_ptr->lv_block_exception[e].rdev_org, lv_ptr->lv_block_exception[e].rsector_org, lv_ptr); + + up(&lv_ptr->lv_snapshot_org->lv_snapshot_sem); + + vfree(lvbe_old); + vfree(lvs_hash_table_old); + + return 0; + } + + /* we drop in here in case it is an original logical volume */ if ((pe = vmalloc(size = lv->lv_current_le * sizeof(pe_t))) == NULL) { printk(KERN_CRIT "%s -- lvm_do_lv_extend_reduce: vmalloc error LV_CURRENT_PE " - "of %d Byte at line %d\n", + "of %lu Byte at line %d\n", lvm_name, size, __LINE__); return -ENOMEM; } @@ -2248,11 +2686,6 @@ static int lvm_do_lv_extend_reduce(int minor, char *lv_name, lv_t *lv) vg_ptr->vg_name); #endif - lv_ptr->lv_status |= LV_SPINDOWN; - fsync_dev(lv_ptr->lv_dev); - lv_ptr->lv_status &= ~LV_ACTIVE; - invalidate_buffers(lv_ptr->lv_dev); - /* reduce allocation counters on PV(s) */ for (le = 0; le < lv_ptr->lv_allocated_le; le++) { vg_ptr->pe_allocated--; @@ -2270,19 +2703,29 @@ static int lvm_do_lv_extend_reduce(int minor, char *lv_name, lv_t *lv) pep1 = lv_ptr->lv_current_pe; end = lv_ptr->lv_current_le; - /* save open counter */ - lv_open = lv_ptr->lv_open; + /* save open counter... */ + lv->lv_open = lv_ptr->lv_open; + lv->lv_snapshot_prev = lv_ptr->lv_snapshot_prev; + lv->lv_snapshot_next = lv_ptr->lv_snapshot_next; + lv->lv_snapshot_org = lv_ptr->lv_snapshot_org; + + lv->lv_current_pe = pe; /* save # of old allocated logical extents */ old_allocated_le = lv_ptr->lv_allocated_le; + /* in case of shrinking -> let's flush */ + if ( end > lv->lv_current_le) fsync_dev(lv_ptr->lv_dev); + /* copy preloaded LV */ - lv_status_save = lv->lv_status; - lv->lv_status |= LV_SPINDOWN; - lv->lv_status &= ~LV_ACTIVE; memcpy((char *) lv_ptr, (char *) lv, sizeof(lv_t)); - lv_ptr->lv_current_pe = pe; - lv_ptr->lv_open = lv_open; + + lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].start_sect = 0; + lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].nr_sects = lv_ptr->lv_size; + lvm_size[MINOR(lv_ptr->lv_dev)] = lv_ptr->lv_size >> 1; + /* vg_lv_map array doesn't have to be changed here */ + + LVM_CORRECT_READ_AHEAD(lv_ptr->lv_read_ahead); /* save availiable i/o statistic data */ /* linear logical volume */ @@ -2290,8 +2733,8 @@ static int lvm_do_lv_extend_reduce(int minor, char *lv_name, lv_t *lv) /* Check what last LE shall be used */ if (end > lv_ptr->lv_current_le) end = lv_ptr->lv_current_le; for (le = 0; le < end; le++) { - lv_ptr->lv_current_pe[le].reads = pep1[le].reads; - lv_ptr->lv_current_pe[le].writes = pep1[le].writes; + lv_ptr->lv_current_pe[le].reads += pep1[le].reads; + lv_ptr->lv_current_pe[le].writes += pep1[le].writes; } /* striped logical volume */ } else { @@ -2304,38 +2747,44 @@ static int lvm_do_lv_extend_reduce(int minor, char *lv_name, lv_t *lv) for (i = source = dest = 0; i < lv_ptr->lv_stripes; i++) { for (j = 0; j < end; j++) { - lv_ptr->lv_current_pe[dest + j].reads = + lv_ptr->lv_current_pe[dest + j].reads += pep1[source + j].reads; - lv_ptr->lv_current_pe[dest + j].writes = + lv_ptr->lv_current_pe[dest + j].writes += pep1[source + j].writes; } source += old_stripe_size; dest += new_stripe_size; } } - vfree(pep1); - pep1 = NULL; - /* extend the PE count in PVs */ for (le = 0; le < lv_ptr->lv_allocated_le; le++) { vg_ptr->pe_allocated++; for (p = 0; p < vg_ptr->pv_cur; p++) { if (vg_ptr->pv[p]->pv_dev == - vg_ptr->lv[l]->lv_current_pe[le].dev) { + lv_ptr->lv_current_pe[le].dev) { vg_ptr->pv[p]->pe_allocated++; break; } } } - lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].start_sect = 0; - lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].nr_sects = lv_ptr->lv_size; - lvm_size[MINOR(lv_ptr->lv_dev)] = lv_ptr->lv_size >> 1; - /* vg_lv_map array doesn't have to be changed here */ + vfree ( pep1); + pep1 = NULL; - read_ahead[MAJOR_NR] = lv_ptr->lv_read_ahead = LVM_CORRECT_READ_AHEAD(lv_ptr->lv_read_ahead); - lv_ptr->lv_status = lv_status_save; + if (lv->lv_access & LV_SNAPSHOT_ORG) + { + /* Correct the snapshot size information */ + while ((lv_ptr = lv_ptr->lv_snapshot_next) != NULL) + { + lv_ptr->lv_current_pe = lv_ptr->lv_snapshot_org->lv_current_pe; + lv_ptr->lv_allocated_le = lv_ptr->lv_snapshot_org->lv_allocated_le; + lv_ptr->lv_current_le = lv_ptr->lv_snapshot_org->lv_current_le; + lv_ptr->lv_size = lv_ptr->lv_snapshot_org->lv_size; + lvm_gendisk.part[MINOR(lv_ptr->lv_dev)].nr_sects = lv_ptr->lv_size; + lvm_size[MINOR(lv_ptr->lv_dev)] = lv_ptr->lv_size >> 1; + } + } return 0; } /* lvm_do_lv_extend_reduce() */ @@ -2424,6 +2873,65 @@ static int lvm_do_lv_status_byindex(vg_t *vg_ptr,void *arg) } /* lvm_do_lv_status_byindex() */ +/* + * character device support function logical volume status by device number + */ +static int lvm_do_lv_status_bydev(vg_t * vg_ptr, void * arg) { + int l; + lv_status_bydev_req_t lv_status_bydev_req; + + if (vg_ptr == NULL) return -ENXIO; + if (copy_from_user(&lv_status_bydev_req, arg, + sizeof(lv_status_bydev_req)) != 0) + return -EFAULT; + + for ( l = 0; l < vg_ptr->lv_max; l++) { + if ( vg_ptr->lv[l] == NULL) continue; + if ( vg_ptr->lv[l]->lv_dev == lv_status_bydev_req.dev) break; + } + + if ( l == vg_ptr->lv_max) return -ENXIO; + + if (copy_to_user(lv_status_bydev_req.lv, + vg_ptr->lv[l], sizeof(lv_t)) != 0) + return -EFAULT; + + return 0; +} /* lvm_do_lv_status_bydev() */ + + +/* + * character device support function rename a logical volume + */ +static int lvm_do_lv_rename(vg_t *vg_ptr, lv_req_t *lv_req, lv_t *lv) +{ + int l = 0; + int ret = 0; + lv_t *lv_ptr = NULL; + + for (l = 0; l < vg_ptr->lv_max; l++) + { + if ( (lv_ptr = vg_ptr->lv[l]) == NULL) continue; + if (lv_ptr->lv_dev == lv->lv_dev) + { +#if defined CONFIG_LVM_PROC_FS && defined CONFIG_PROC_FS + lvm_do_remove_proc_entry_of_lv ( vg_ptr, lv_ptr); +#endif + strncpy(lv_ptr->lv_name, + lv_req->lv_name, + NAME_LEN); +#if defined CONFIG_LVM_PROC_FS && defined CONFIG_PROC_FS + lvm_do_create_proc_entry_of_lv ( vg_ptr, lv_ptr); +#endif + break; + } + } + if (l == vg_ptr->lv_max) ret = -ENODEV; + + return ret; +} /* lvm_do_lv_rename */ + + /* * character device support function physical volume change */ @@ -2494,6 +3002,140 @@ static int lvm_do_pv_status(vg_t *vg_ptr, void *arg) } /* lvm_do_pv_status() */ + +/* + * create a /proc entry for a logical volume + */ +inline void lvm_do_create_proc_entry_of_lv ( vg_t *vg_ptr, lv_t *lv_ptr) { + char *basename; + + if ( vg_ptr->lv_subdir_pde != NULL) { + basename = strrchr(lv_ptr->lv_name, '/'); + if (basename == NULL) basename = lv_ptr->lv_name; + else basename++; + pde = create_proc_entry(basename, S_IFREG, + vg_ptr->lv_subdir_pde); + if ( pde != NULL) { + pde->read_proc = lvm_proc_read_lv_info; + pde->data = lv_ptr; + } + } +} + + +/* + * remove a /proc entry for a logical volume + */ +inline void lvm_do_remove_proc_entry_of_lv ( vg_t *vg_ptr, lv_t *lv_ptr) { + char *basename; + + if ( vg_ptr->lv_subdir_pde != NULL) { + basename = strrchr(lv_ptr->lv_name, '/'); + if (basename == NULL) basename = lv_ptr->lv_name; + else basename++; + remove_proc_entry(basename, vg_ptr->lv_subdir_pde); + } +} + + +/* + * create a /proc entry for a physical volume + */ +inline void lvm_do_create_proc_entry_of_pv ( vg_t *vg_ptr, pv_t *pv_ptr) { + char *basename; + + basename = strrchr(pv_ptr->pv_name, '/'); + if (basename == NULL) basename = pv_ptr->pv_name; + else basename++; + pde = create_proc_entry(basename, S_IFREG, vg_ptr->pv_subdir_pde); + if ( pde != NULL) { + pde->read_proc = lvm_proc_read_pv_info; + pde->data = pv_ptr; + } +} + + +/* + * remove a /proc entry for a physical volume + */ +inline void lvm_do_remove_proc_entry_of_pv ( vg_t *vg_ptr, pv_t *pv_ptr) { + char *basename; + + basename = strrchr(pv_ptr->pv_name, '/'); + if ( vg_ptr->pv_subdir_pde != NULL) { + basename = strrchr(pv_ptr->pv_name, '/'); + if (basename == NULL) basename = pv_ptr->pv_name; + else basename++; + remove_proc_entry(basename, vg_ptr->pv_subdir_pde); + } +} + + +/* + * create a /proc entry for a volume group + */ +#if defined CONFIG_LVM_PROC_FS && defined CONFIG_PROC_FS +void lvm_do_create_proc_entry_of_vg ( vg_t *vg_ptr) { + int l, p; + pv_t *pv_ptr; + lv_t *lv_ptr; + + pde = create_proc_entry(vg_ptr->vg_name, S_IFDIR, + lvm_proc_vg_subdir); + if ( pde != NULL) { + vg_ptr->vg_dir_pde = pde; + pde = create_proc_entry("group", S_IFREG, + vg_ptr->vg_dir_pde); + if ( pde != NULL) { + pde->read_proc = lvm_proc_read_vg_info; + pde->data = vg_ptr; + } + vg_ptr->lv_subdir_pde = + create_proc_entry(LVM_LV_SUBDIR, S_IFDIR, + vg_ptr->vg_dir_pde); + vg_ptr->pv_subdir_pde = + create_proc_entry(LVM_PV_SUBDIR, S_IFDIR, + vg_ptr->vg_dir_pde); + } + + if ( vg_ptr->pv_subdir_pde != NULL) { + for ( l = 0; l < vg_ptr->lv_max; l++) { + if ( ( lv_ptr = vg_ptr->lv[l]) == NULL) continue; + lvm_do_create_proc_entry_of_lv ( vg_ptr, lv_ptr); + } + for ( p = 0; p < vg_ptr->pv_max; p++) { + if ( ( pv_ptr = vg_ptr->pv[p]) == NULL) continue; + lvm_do_create_proc_entry_of_pv ( vg_ptr, pv_ptr); + } + } +} + +/* + * remove a /proc entry for a volume group + */ +void lvm_do_remove_proc_entry_of_vg ( vg_t *vg_ptr) { + int l, p; + lv_t *lv_ptr; + pv_t *pv_ptr; + + for ( l = 0; l < vg_ptr->lv_max; l++) { + if ( ( lv_ptr = vg_ptr->lv[l]) == NULL) continue; + lvm_do_remove_proc_entry_of_lv ( vg_ptr, vg_ptr->lv[l]); + } + for ( p = 0; p < vg_ptr->pv_max; p++) { + if ( ( pv_ptr = vg_ptr->pv[p]) == NULL) continue; + lvm_do_remove_proc_entry_of_pv ( vg_ptr, vg_ptr->pv[p]); + } + if ( vg_ptr->vg_dir_pde != NULL) { + remove_proc_entry(LVM_LV_SUBDIR, vg_ptr->vg_dir_pde); + remove_proc_entry(LVM_PV_SUBDIR, vg_ptr->vg_dir_pde); + remove_proc_entry("group", vg_ptr->vg_dir_pde); + remove_proc_entry(vg_ptr->vg_name, lvm_proc_vg_subdir); + } +} +#endif + + /* * support function initialize gendisk variables */ @@ -2516,8 +3158,9 @@ void __init lvm_blocksizes[i] = BLOCK_SIZE; } - blksize_size[MAJOR_NR] = lvm_blocksizes; blk_size[MAJOR_NR] = lvm_size; + blksize_size[MAJOR_NR] = lvm_blocksizes; + hardsect_size[MAJOR_NR] = lvm_blocksizes; return; } /* lvm_gen_init() */ @@ -2533,17 +3176,8 @@ void __init * * Is this the real thing? * - * No, it's bollocks. md.c tries to do a bit different thing that might - * _somewhat_ work eons ago. Neither does any good these days. mount() couldn't - * care less for icache (it cares only for ->s_root->d_count and if we want - * loopback mounts even that will stop). BTW, with the form used here mount() - * would have to scan the _whole_ icache to detect the attempt - how on the - * Earth could it guess the i_ino of your dummy inode? Official line on the - * exclusion between mount()/swapon()/open()/etc. is Just Don't Do It(tm). - * If you can convince Linus that it's worth changing - fine, then you'll need - * to do blkdev_get()/blkdev_put(). Until then... */ -struct inode *lvm_get_inode(kdev_t dev) +struct inode *lvm_get_inode(int dev) { struct inode *inode_this = NULL; diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index a06b407346d6..663dfd3958d7 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -30,12 +30,12 @@ static mdk_personality_t raid5_personality; * Stripe cache */ -#define NR_STRIPES 128 +#define NR_STRIPES 256 #define HASH_PAGES 1 #define HASH_PAGES_ORDER 0 #define NR_HASH (HASH_PAGES * PAGE_SIZE / sizeof(struct stripe_head *)) #define HASH_MASK (NR_HASH - 1) -#define stripe_hash(conf, sect, size) ((conf)->stripe_hashtbl[((sect) / (size >> 9)) & HASH_MASK]) +#define stripe_hash(conf, sect) ((conf)->stripe_hashtbl[((sect) / ((conf)->buffer_size >> 9)) & HASH_MASK]) /* * The following can be used to debug the driver @@ -44,10 +44,8 @@ static mdk_personality_t raid5_personality; #define RAID5_PARANOIA 1 #if RAID5_PARANOIA && CONFIG_SMP # define CHECK_DEVLOCK() if (!spin_is_locked(&conf->device_lock)) BUG() -# define CHECK_SHLOCK(sh) if (!stripe_locked(sh)) BUG() #else # define CHECK_DEVLOCK() -# define CHECK_SHLOCK(unused) #endif #if RAID5_DEBUG @@ -60,196 +58,98 @@ static mdk_personality_t raid5_personality; static void print_raid5_conf (raid5_conf_t *conf); -static inline int stripe_locked(struct stripe_head *sh) +static inline void __release_stripe(raid5_conf_t *conf, struct stripe_head *sh) { - return test_bit(STRIPE_LOCKED, &sh->state); -} - -static void __unlock_stripe(struct stripe_head *sh) -{ - if (!md_test_and_clear_bit(STRIPE_LOCKED, &sh->state)) - BUG(); - PRINTK("unlocking stripe %lu\n", sh->sector); - wake_up(&sh->wait); + if (atomic_dec_and_test(&sh->count)) { + if (!list_empty(&sh->lru)) + BUG(); + if (atomic_read(&conf->active_stripes)==0) + BUG(); + if (test_bit(STRIPE_HANDLE, &sh->state)) { + list_add_tail(&sh->lru, &conf->handle_list); + md_wakeup_thread(conf->thread); + } + else { + list_add_tail(&sh->lru, &conf->inactive_list); + atomic_dec(&conf->active_stripes); + wake_up(&conf->wait_for_stripe); + } + } } - -static void finish_unlock_stripe(struct stripe_head *sh) +static void release_stripe(struct stripe_head *sh) { raid5_conf_t *conf = sh->raid_conf; - sh->cmd = STRIPE_NONE; - sh->phase = PHASE_COMPLETE; - atomic_dec(&conf->nr_pending_stripes); - atomic_inc(&conf->nr_cached_stripes); - __unlock_stripe(sh); - atomic_dec(&sh->count); - wake_up(&conf->wait_for_stripe); + + spin_lock_irq(&conf->device_lock); + __release_stripe(conf, sh); + spin_unlock_irq(&conf->device_lock); } -static void remove_hash(raid5_conf_t *conf, struct stripe_head *sh) +static void remove_hash(struct stripe_head *sh) { PRINTK("remove_hash(), stripe %lu\n", sh->sector); - CHECK_DEVLOCK(); - CHECK_SHLOCK(sh); if (sh->hash_pprev) { if (sh->hash_next) sh->hash_next->hash_pprev = sh->hash_pprev; *sh->hash_pprev = sh->hash_next; sh->hash_pprev = NULL; - atomic_dec(&conf->nr_hashed_stripes); } } -static void lock_get_bh (struct buffer_head *bh) -{ - while (md_test_and_set_bit(BH_Lock, &bh->b_state)) - __wait_on_buffer(bh); - atomic_inc(&bh->b_count); -} - static __inline__ void insert_hash(raid5_conf_t *conf, struct stripe_head *sh) { - struct stripe_head **shp = &stripe_hash(conf, sh->sector, sh->size); + struct stripe_head **shp = &stripe_hash(conf, sh->sector); - PRINTK("insert_hash(), stripe %lu, nr_hashed_stripes %d\n", - sh->sector, atomic_read(&conf->nr_hashed_stripes)); + PRINTK("insert_hash(), stripe %lu\n",sh->sector); CHECK_DEVLOCK(); - CHECK_SHLOCK(sh); if ((sh->hash_next = *shp) != NULL) (*shp)->hash_pprev = &sh->hash_next; *shp = sh; sh->hash_pprev = shp; - atomic_inc(&conf->nr_hashed_stripes); } -static struct buffer_head *get_free_buffer(struct stripe_head *sh, int b_size) -{ - struct buffer_head *bh; - unsigned long flags; - - CHECK_SHLOCK(sh); - md_spin_lock_irqsave(&sh->stripe_lock, flags); - bh = sh->buffer_pool; - if (!bh) - goto out_unlock; - sh->buffer_pool = bh->b_next; - bh->b_size = b_size; - if (atomic_read(&bh->b_count)) - BUG(); -out_unlock: - md_spin_unlock_irqrestore(&sh->stripe_lock, flags); - - return bh; -} - -static struct buffer_head *get_free_bh(struct stripe_head *sh) -{ - struct buffer_head *bh; - unsigned long flags; - - CHECK_SHLOCK(sh); - md_spin_lock_irqsave(&sh->stripe_lock, flags); - bh = sh->bh_pool; - if (!bh) - goto out_unlock; - sh->bh_pool = bh->b_next; - if (atomic_read(&bh->b_count)) - BUG(); -out_unlock: - md_spin_unlock_irqrestore(&sh->stripe_lock, flags); - - return bh; -} - -static void put_free_buffer(struct stripe_head *sh, struct buffer_head *bh) -{ - unsigned long flags; - - if (atomic_read(&bh->b_count)) - BUG(); - CHECK_SHLOCK(sh); - md_spin_lock_irqsave(&sh->stripe_lock, flags); - bh->b_next = sh->buffer_pool; - sh->buffer_pool = bh; - md_spin_unlock_irqrestore(&sh->stripe_lock, flags); -} - -static void put_free_bh(struct stripe_head *sh, struct buffer_head *bh) -{ - unsigned long flags; - - if (atomic_read(&bh->b_count)) - BUG(); - CHECK_SHLOCK(sh); - md_spin_lock_irqsave(&sh->stripe_lock, flags); - bh->b_next = sh->bh_pool; - sh->bh_pool = bh; - md_spin_unlock_irqrestore(&sh->stripe_lock, flags); -} +/* find an idle stripe, make sure it is unhashed, and return it. */ static struct stripe_head *get_free_stripe(raid5_conf_t *conf) { - struct stripe_head *sh; + struct stripe_head *sh = NULL; + struct list_head *first; - md_spin_lock_irq(&conf->device_lock); - sh = conf->free_sh_list; - if (!sh) + CHECK_DEVLOCK(); + if (list_empty(&conf->inactive_list)) goto out; - conf->free_sh_list = sh->free_next; - atomic_dec(&conf->nr_free_sh); - if (!atomic_read(&conf->nr_free_sh) && conf->free_sh_list) - BUG(); - if (sh->hash_pprev || md_atomic_read(&sh->nr_pending) || - atomic_read(&sh->count)) - BUG(); + first = conf->inactive_list.next; + sh = list_entry(first, struct stripe_head, lru); + list_del_init(first); + remove_hash(sh); + atomic_inc(&conf->active_stripes); out: - md_spin_unlock_irq(&conf->device_lock); return sh; } -static void __put_free_stripe (raid5_conf_t *conf, struct stripe_head *sh) -{ - if (atomic_read(&sh->count) != 0) - BUG(); - CHECK_DEVLOCK(); - CHECK_SHLOCK(sh); - clear_bit(STRIPE_LOCKED, &sh->state); - sh->free_next = conf->free_sh_list; - conf->free_sh_list = sh; - atomic_inc(&conf->nr_free_sh); -} - static void shrink_buffers(struct stripe_head *sh, int num) { struct buffer_head *bh; + int i; - while (num--) { - bh = get_free_buffer(sh, -1); + for (i=0; ibh_cache[i]; if (!bh) return; + sh->bh_cache[i] = NULL; free_page((unsigned long) bh->b_data); kfree(bh); } } -static void shrink_bh(struct stripe_head *sh, int num) -{ - struct buffer_head *bh; - - while (num--) { - bh = get_free_bh(sh); - if (!bh) - return; - kfree(bh); - } -} - -static int grow_raid5_buffers(struct stripe_head *sh, int num, int b_size, int priority) +static int grow_buffers(struct stripe_head *sh, int num, int b_size, int priority) { struct buffer_head *bh; + int i; - while (num--) { + for (i=0; ib_size = b_size; atomic_set(&bh->b_count, 0); bh->b_page = page; - put_free_buffer(sh, bh); - } - return 0; -} + sh->bh_cache[i] = bh; -static int grow_bh(struct stripe_head *sh, int num, int priority) -{ - struct buffer_head *bh; - - while (num--) { - bh = kmalloc(sizeof(struct buffer_head), priority); - if (!bh) - return 1; - memset(bh, 0, sizeof (struct buffer_head)); - init_waitqueue_head(&bh->b_wait); - put_free_bh(sh, bh); } return 0; } -static void raid5_free_buffer(struct stripe_head *sh, struct buffer_head *bh) -{ - put_free_buffer(sh, bh); -} +static struct buffer_head *raid5_build_block (struct stripe_head *sh, int i); -static void raid5_free_bh(struct stripe_head *sh, struct buffer_head *bh) -{ - put_free_bh(sh, bh); -} - -static void raid5_free_old_bh(struct stripe_head *sh, int i) -{ - CHECK_SHLOCK(sh); - if (!sh->bh_old[i]) - BUG(); - raid5_free_buffer(sh, sh->bh_old[i]); - sh->bh_old[i] = NULL; -} - -static void raid5_update_old_bh(struct stripe_head *sh, int i) -{ - CHECK_SHLOCK(sh); - PRINTK("stripe %lu, idx %d, updating cache copy\n", sh->sector, i); - if (!sh->bh_copy[i]) - BUG(); - if (sh->bh_old[i]) - raid5_free_old_bh(sh, i); - sh->bh_old[i] = sh->bh_copy[i]; - sh->bh_copy[i] = NULL; -} - -static void free_stripe(struct stripe_head *sh) +static inline void init_stripe(struct stripe_head *sh, unsigned long sector) { raid5_conf_t *conf = sh->raid_conf; - int disks = conf->raid_disks, j; + int disks = conf->raid_disks, i; if (atomic_read(&sh->count) != 0) BUG(); + if (test_bit(STRIPE_HANDLE, &sh->state)) + BUG(); + CHECK_DEVLOCK(); - CHECK_SHLOCK(sh); - PRINTK("free_stripe called, stripe %lu\n", sh->sector); - if (sh->phase != PHASE_COMPLETE || atomic_read(&sh->count)) { - PRINTK("raid5: free_stripe(), sector %lu, phase %d, count %d\n", sh->sector, sh->phase, atomic_read(&sh->count)); - return; - } - for (j = 0; j < disks; j++) { - if (sh->bh_old[j]) - raid5_free_old_bh(sh, j); - if (sh->bh_new[j] || sh->bh_copy[j]) - BUG(); - } - remove_hash(conf, sh); - __put_free_stripe(conf, sh); -} + PRINTK("init_stripe called, stripe %lu\n", sh->sector); -static int shrink_stripe_cache(raid5_conf_t *conf, int nr) -{ - struct stripe_head *sh; - int i, count = 0; - - PRINTK("shrink_stripe_cache called, %d/%d, clock %d\n", nr, atomic_read(&conf->nr_hashed_stripes), conf->clock); - md_spin_lock_irq(&conf->device_lock); - for (i = 0; i < NR_HASH; i++) { - sh = conf->stripe_hashtbl[(i + conf->clock) & HASH_MASK]; - for (; sh; sh = sh->hash_next) { - if (sh->phase != PHASE_COMPLETE) - continue; - if (atomic_read(&sh->count)) - continue; - /* - * Try to lock this stripe: - */ - if (md_test_and_set_bit(STRIPE_LOCKED, &sh->state)) - continue; - free_stripe(sh); - if (++count == nr) { - conf->clock = (i + conf->clock) & HASH_MASK; - goto out; - } + remove_hash(sh); + + sh->sector = sector; + sh->size = conf->buffer_size; + sh->state = 0; + + for (i=disks; i--; ) { + if (sh->bh_read[i] || sh->bh_write[i] || sh->bh_written[i] || + buffer_locked(sh->bh_cache[i])) { + printk("sector=%lx i=%d %p %p %p %d\n", + sh->sector, i, sh->bh_read[i], + sh->bh_write[i], sh->bh_written[i], + buffer_locked(sh->bh_cache[i])); + BUG(); } + clear_bit(BH_Uptodate, &sh->bh_cache[i]->b_state); + raid5_build_block(sh, i); } -out: - md_spin_unlock_irq(&conf->device_lock); - PRINTK("shrink completed, nr_hashed_stripes %d, nr_pending_strips %d\n", - atomic_read(&conf->nr_hashed_stripes), - atomic_read(&conf->nr_pending_stripes)); - return count; + insert_hash(conf, sh); } -void __wait_lock_stripe(struct stripe_head *sh) +/* the buffer size has changed, so unhash all stripes + * as active stripes complete, they will go onto inactive list + */ +static void shrink_stripe_cache(raid5_conf_t *conf) { - MD_DECLARE_WAITQUEUE(wait, current); - - PRINTK("wait_lock_stripe %lu\n", sh->sector); - if (!atomic_read(&sh->count)) + int i; + CHECK_DEVLOCK(); + if (atomic_read(&conf->active_stripes)) BUG(); - add_wait_queue(&sh->wait, &wait); -repeat: - set_current_state(TASK_UNINTERRUPTIBLE); - if (md_test_and_set_bit(STRIPE_LOCKED, &sh->state)) { - schedule(); - goto repeat; + for (i=0; i < NR_HASH; i++) { + struct stripe_head *sh; + while ((sh = conf->stripe_hashtbl[i])) + remove_hash(sh); } - PRINTK("wait_lock_stripe %lu done\n", sh->sector); - remove_wait_queue(&sh->wait, &wait); - current->state = TASK_RUNNING; } -static struct stripe_head *__find_stripe(raid5_conf_t *conf, unsigned long sector, int size) +static struct stripe_head *__find_stripe(raid5_conf_t *conf, unsigned long sector) { struct stripe_head *sh; + CHECK_DEVLOCK(); PRINTK("__find_stripe, sector %lu\n", sector); - for (sh = stripe_hash(conf, sector, size); sh; sh = sh->hash_next) { - if (sh->sector == sector && sh->raid_conf == conf) { - if (sh->size != size) - BUG(); + for (sh = stripe_hash(conf, sector); sh; sh = sh->hash_next) + if (sh->sector == sector) return sh; - } - } PRINTK("__stripe %lu not in cache\n", sector); return NULL; } -static inline struct stripe_head *alloc_stripe(raid5_conf_t *conf, unsigned long sector, int size) +static struct stripe_head *get_active_stripe(raid5_conf_t *conf, unsigned long sector, int size, int noblock) { struct stripe_head *sh; - struct buffer_head *buffer_pool, *bh_pool; - MD_DECLARE_WAITQUEUE(wait, current); - - PRINTK("alloc_stripe called\n"); - - - while ((sh = get_free_stripe(conf)) == NULL) { - int cnt; - add_wait_queue(&conf->wait_for_stripe, &wait); - set_current_state(TASK_UNINTERRUPTIBLE); - cnt = shrink_stripe_cache(conf, conf->max_nr_stripes / 8); - sh = get_free_stripe(conf); - if (!sh && cnt < (conf->max_nr_stripes/8)) { - md_wakeup_thread(conf->thread); - PRINTK("waiting for some stripes to complete - %d %d\n", cnt, conf->max_nr_stripes/8); - schedule(); - } - remove_wait_queue(&conf->wait_for_stripe, &wait); - current->state = TASK_RUNNING; - if (sh) - break; - } - buffer_pool = sh->buffer_pool; - bh_pool = sh->bh_pool; - memset(sh, 0, sizeof(*sh)); - sh->stripe_lock = MD_SPIN_LOCK_UNLOCKED; - md_init_waitqueue_head(&sh->wait); - sh->buffer_pool = buffer_pool; - sh->bh_pool = bh_pool; - sh->phase = PHASE_COMPLETE; - sh->cmd = STRIPE_NONE; - sh->raid_conf = conf; - sh->sector = sector; - sh->size = size; - atomic_inc(&conf->nr_cached_stripes); - - return sh; -} + PRINTK("get_stripe, sector %lu\n", sector); -static struct stripe_head *get_lock_stripe(raid5_conf_t *conf, unsigned long sector, int size) -{ - struct stripe_head *sh, *new = NULL; + md_spin_lock_irq(&conf->device_lock); - PRINTK("get_stripe, sector %lu\n", sector); + do { + if (conf->buffer_size == 0 || + (size && size != conf->buffer_size)) { + /* either the size is being changed (buffer_size==0) or + * we need to change it. + * If size==0, we can proceed as soon as buffer_size gets set. + * If size>0, we can proceed when active_stripes reaches 0, or + * when someone else sets the buffer_size to size. + * If someone sets the buffer size to something else, we will need to + * assert that we want to change it again + */ + int oldsize = conf->buffer_size; + PRINTK("get_stripe %ld/%d buffer_size is %d, %d active\n", sector, size, conf->buffer_size, atomic_read(&conf->active_stripes)); + if (size==0) + wait_event_lock_irq(conf->wait_for_stripe, + conf->buffer_size, + conf->device_lock); + else { + while (conf->buffer_size != size && atomic_read(&conf->active_stripes)) { + conf->buffer_size = 0; + wait_event_lock_irq(conf->wait_for_stripe, + atomic_read(&conf->active_stripes)==0 || conf->buffer_size, + conf->device_lock); + PRINTK("waited and now %ld/%d buffer_size is %d - %d active\n", sector, size, + conf->buffer_size, atomic_read(&conf->active_stripes)); + } - /* - * Do this in set_blocksize()! - */ - if (conf->buffer_size != size) { - PRINTK("switching size, %d --> %d\n", conf->buffer_size, size); - shrink_stripe_cache(conf, conf->max_nr_stripes); - conf->buffer_size = size; - } + if (conf->buffer_size != size) { + printk("raid5: switching cache buffer size, %d --> %d\n", oldsize, size); + shrink_stripe_cache(conf); + if (size==0) BUG(); + conf->buffer_size = size; + PRINTK("size now %d\n", conf->buffer_size); + } + } + } + if (size == 0) + sector -= sector & ((conf->buffer_size>>9)-1); -repeat: - md_spin_lock_irq(&conf->device_lock); - sh = __find_stripe(conf, sector, size); - if (!sh) { - if (!new) { - md_spin_unlock_irq(&conf->device_lock); - new = alloc_stripe(conf, sector, size); - goto repeat; + sh = __find_stripe(conf, sector); + if (!sh) { + sh = get_free_stripe(conf); + if (noblock && sh == NULL) + break; + if (!sh) { + wait_event_lock_irq(conf->wait_for_stripe, + !list_empty(&conf->inactive_list), + conf->device_lock); + } else + init_stripe(sh, sector); + } else { + if (atomic_read(&sh->count)) { + if (!list_empty(&sh->lru)) + BUG(); + } else { + if (!test_bit(STRIPE_HANDLE, &sh->state)) + atomic_inc(&conf->active_stripes); + if (list_empty(&sh->lru)) + BUG(); + list_del_init(&sh->lru); + } } - sh = new; - new = NULL; - if (md_test_and_set_bit(STRIPE_LOCKED, &sh->state)) - BUG(); - insert_hash(conf, sh); - atomic_inc(&sh->count); - md_spin_unlock_irq(&conf->device_lock); - } else { + } while (sh == NULL); + + if (sh) atomic_inc(&sh->count); - if (new) { - if (md_test_and_set_bit(STRIPE_LOCKED, &new->state)) - BUG(); - __put_free_stripe(conf, new); - } - md_spin_unlock_irq(&conf->device_lock); - PRINTK("get_stripe, waiting, sector %lu\n", sector); - if (md_test_and_set_bit(STRIPE_LOCKED, &sh->state)) - __wait_lock_stripe(sh); - } + + md_spin_unlock_irq(&conf->device_lock); return sh; } @@ -508,26 +324,18 @@ static int grow_stripes(raid5_conf_t *conf, int num, int priority) return 1; memset(sh, 0, sizeof(*sh)); sh->raid_conf = conf; - sh->stripe_lock = MD_SPIN_LOCK_UNLOCKED; - md_init_waitqueue_head(&sh->wait); + sh->lock = SPIN_LOCK_UNLOCKED; - if (md_test_and_set_bit(STRIPE_LOCKED, &sh->state)) - BUG(); - if (grow_raid5_buffers(sh, 2 * conf->raid_disks, PAGE_SIZE, priority)) { - shrink_buffers(sh, 2 * conf->raid_disks); - kfree(sh); - return 1; - } - if (grow_bh(sh, conf->raid_disks, priority)) { - shrink_buffers(sh, 2 * conf->raid_disks); - shrink_bh(sh, conf->raid_disks); + if (grow_buffers(sh, conf->raid_disks, PAGE_SIZE, priority)) { + shrink_buffers(sh, conf->raid_disks); kfree(sh); return 1; } - md_spin_lock_irq(&conf->device_lock); - __put_free_stripe(conf, sh); - atomic_inc(&conf->nr_stripes); - md_spin_unlock_irq(&conf->device_lock); + /* we just created an active stripe so... */ + atomic_set(&sh->count, 1); + atomic_inc(&conf->active_stripes); + INIT_LIST_HEAD(&sh->lru); + release_stripe(sh); } return 0; } @@ -537,119 +345,124 @@ static void shrink_stripes(raid5_conf_t *conf, int num) struct stripe_head *sh; while (num--) { + spin_lock_irq(&conf->device_lock); sh = get_free_stripe(conf); + spin_unlock_irq(&conf->device_lock); if (!sh) break; - if (md_test_and_set_bit(STRIPE_LOCKED, &sh->state)) + if (atomic_read(&sh->count)) BUG(); - shrink_buffers(sh, conf->raid_disks * 2); - shrink_bh(sh, conf->raid_disks); + shrink_buffers(sh, conf->raid_disks); kfree(sh); - atomic_dec(&conf->nr_stripes); + atomic_dec(&conf->active_stripes); } } -static struct buffer_head *raid5_alloc_buffer(struct stripe_head *sh, int b_size) +static inline void raid5_end_buffer_read(struct buffer_head *blist, struct buffer_head *bh) { - struct buffer_head *bh; - - bh = get_free_buffer(sh, b_size); - if (!bh) - BUG(); - return bh; + while (blist) { + struct buffer_head *new = blist; + blist = new->b_reqnext; + memcpy(new->b_data, bh->b_data, bh->b_size); + new->b_end_io(new, 1); + } } -static struct buffer_head *raid5_alloc_bh(struct stripe_head *sh) +static void raid5_end_read_request (struct buffer_head * bh, int uptodate) { - struct buffer_head *bh; + struct stripe_head *sh = bh->b_private; + raid5_conf_t *conf = sh->raid_conf; + int disks = conf->raid_disks, i; + unsigned long flags; + struct buffer_head *buffers = NULL; - bh = get_free_bh(sh); - if (!bh) - BUG(); - return bh; -} + for (i=0 ; ibh_cache[i]) + break; -static void raid5_end_buffer_io (struct stripe_head *sh, int i, int uptodate) -{ - struct buffer_head *bh = sh->bh_new[i]; - - PRINTK("raid5_end_buffer_io %lu, uptodate: %d.\n", bh->b_blocknr, uptodate); - sh->bh_new[i] = NULL; - raid5_free_bh(sh, sh->bh_req[i]); - sh->bh_req[i] = NULL; - PRINTK("calling %p->end_io: %p.\n", bh, bh->b_end_io); - bh->b_end_io(bh, uptodate); - if (!uptodate) - printk(KERN_ALERT "raid5: %s: unrecoverable I/O error for " - "block %lu\n", - partition_name(mddev_to_kdev(sh->raid_conf->mddev)), - bh->b_blocknr); -} + PRINTK("end_read_request %lu/%d, %d, count: %d, uptodate %d.\n", sh->sector, i, atomic_read(&sh->count), uptodate); + if (i == disks) { + BUG(); + return; + } -static inline void raid5_mark_buffer_uptodate (struct buffer_head *bh, int uptodate) -{ - if (uptodate) + md_spin_lock_irqsave(&conf->device_lock, flags); + if (uptodate) { +#ifdef CONFIG_HIGHMEM + /* cannot map highmem bufferheads from irq, + * so leave it for stripe_handle if there might + * be a problem + */ + if (sh->bh_read[i] && + sh->bh_read[i]->b_reqnext == NULL && + !PageHighMem(sh->bh_read[i]->b_page)) { + /* it's safe */ + buffers = sh->bh_read[i]; + sh->bh_read[i] = NULL; + } +#else + buffers = sh->bh_read[i]; + sh->bh_read[i] = NULL; +#endif set_bit(BH_Uptodate, &bh->b_state); - else + if (buffers) { + spin_unlock_irqrestore(&conf->device_lock, flags); + raid5_end_buffer_read(buffers, bh); + spin_lock_irqsave(&conf->device_lock, flags); + } + } else { + md_error(mddev_to_kdev(conf->mddev), bh->b_dev); clear_bit(BH_Uptodate, &bh->b_state); + } + clear_bit(BH_Lock, &bh->b_state); + set_bit(STRIPE_HANDLE, &sh->state); + __release_stripe(conf, sh); + md_spin_unlock_irqrestore(&conf->device_lock, flags); } -static void raid5_end_request (struct buffer_head * bh, int uptodate) +static void raid5_end_write_request (struct buffer_head *bh, int uptodate) { struct stripe_head *sh = bh->b_private; raid5_conf_t *conf = sh->raid_conf; int disks = conf->raid_disks, i; unsigned long flags; - PRINTK("end_request %lu, nr_pending %d, uptodate: %d, (caller: %p,%p,%p,%p).\n", sh->sector, atomic_read(&sh->nr_pending), uptodate, __builtin_return_address(0),__builtin_return_address(1),__builtin_return_address(2), __builtin_return_address(3)); - md_spin_lock_irqsave(&sh->stripe_lock, flags); - raid5_mark_buffer_uptodate(bh, uptodate); - if (!uptodate) - md_error(mddev_to_kdev(conf->mddev), bh->b_dev); - if (conf->failed_disks) { - for (i = 0; i < disks; i++) { - if (conf->disks[i].operational) - continue; - if (bh != sh->bh_old[i] && bh != sh->bh_req[i] && bh != sh->bh_copy[i]) - continue; - if (bh->b_dev != conf->disks[i].dev) - continue; - set_bit(STRIPE_ERROR, &sh->state); - } - } - md_spin_unlock_irqrestore(&sh->stripe_lock, flags); + for (i=0 ; ibh_cache[i]) + break; - if (atomic_dec_and_test(&sh->nr_pending)) { - atomic_inc(&conf->nr_handle); - md_wakeup_thread(conf->thread); + PRINTK("end_write_request %lu/%d, count %d, uptodate: %d.\n", sh->sector, i, atomic_read(&sh->count), uptodate); + if (i == disks) { + BUG(); + return; } + + md_spin_lock_irqsave(&conf->device_lock, flags); + if (!uptodate) + md_error(mddev_to_kdev(conf->mddev), bh->b_dev); + clear_bit(BH_Lock, &bh->b_state); + set_bit(STRIPE_HANDLE, &sh->state); + __release_stripe(conf, sh); + md_spin_unlock_irqrestore(&conf->device_lock, flags); } + + -static void raid5_build_block (struct stripe_head *sh, struct buffer_head *bh, int i) +static struct buffer_head *raid5_build_block (struct stripe_head *sh, int i) { raid5_conf_t *conf = sh->raid_conf; - char *b_data; - struct page *b_page; + struct buffer_head *bh = sh->bh_cache[i]; unsigned long block = sh->sector / (sh->size >> 9); - b_data = bh->b_data; - b_page = bh->b_page; - memset (bh, 0, sizeof (struct buffer_head)); - init_waitqueue_head(&bh->b_wait); - init_buffer(bh, raid5_end_request, sh); - bh->b_dev = conf->disks[i].dev; - bh->b_blocknr = block; - - bh->b_data = b_data; - bh->b_page = b_page; - - bh->b_rdev = conf->disks[i].dev; - bh->b_rsector = sh->sector; + init_buffer(bh, raid5_end_read_request, sh); + bh->b_dev = conf->disks[i].dev; + bh->b_blocknr = block; bh->b_state = (1 << BH_Req) | (1 << BH_Mapped); bh->b_size = sh->size; bh->b_list = BUF_LOCKED; + return bh; } static int raid5_error (mddev_t *mddev, kdev_t dev) @@ -778,6 +591,7 @@ static unsigned long raid5_compute_sector(unsigned long r_sector, unsigned int r return new_sector; } +#if 0 static unsigned long compute_blocknr(struct stripe_head *sh, int i) { raid5_conf_t *conf = sh->raid_conf; @@ -816,38 +630,42 @@ static unsigned long compute_blocknr(struct stripe_head *sh, int i) } return blocknr; } +#endif + +#define check_xor() do { \ + if (count == MAX_XOR_BLOCKS) { \ + xor_block(count, bh_ptr); \ + count = 1; \ + } \ + } while(0) + static void compute_block(struct stripe_head *sh, int dd_idx) { raid5_conf_t *conf = sh->raid_conf; int i, count, disks = conf->raid_disks; - struct buffer_head *bh_ptr[MAX_XOR_BLOCKS]; + struct buffer_head *bh_ptr[MAX_XOR_BLOCKS], *bh; PRINTK("compute_block, stripe %lu, idx %d\n", sh->sector, dd_idx); - if (sh->bh_old[dd_idx] == NULL) - sh->bh_old[dd_idx] = raid5_alloc_buffer(sh, sh->size); - raid5_build_block(sh, sh->bh_old[dd_idx], dd_idx); - memset(sh->bh_old[dd_idx]->b_data, 0, sh->size); - bh_ptr[0] = sh->bh_old[dd_idx]; + memset(sh->bh_cache[dd_idx]->b_data, 0, sh->size); + bh_ptr[0] = sh->bh_cache[dd_idx]; count = 1; - for (i = 0; i < disks; i++) { + for (i = disks ; i--; ) { if (i == dd_idx) continue; - if (sh->bh_old[i]) { - bh_ptr[count++] = sh->bh_old[i]; - } else { + bh = sh->bh_cache[i]; + if (buffer_uptodate(bh)) + bh_ptr[count++] = bh; + else printk("compute_block() %d, stripe %lu, %d not present\n", dd_idx, sh->sector, i); - } - if (count == MAX_XOR_BLOCKS) { - xor_block(count, &bh_ptr[0]); - count = 1; - } + + check_xor(); } if (count != 1) - xor_block(count, &bh_ptr[0]); - raid5_mark_buffer_uptodate(sh->bh_old[dd_idx], 1); + xor_block(count, bh_ptr); + set_bit(BH_Uptodate, &sh->bh_cache[dd_idx]->b_state); } static void compute_parity(struct stripe_head *sh, int method) @@ -855,606 +673,432 @@ static void compute_parity(struct stripe_head *sh, int method) raid5_conf_t *conf = sh->raid_conf; int i, pd_idx = sh->pd_idx, disks = conf->raid_disks, count; struct buffer_head *bh_ptr[MAX_XOR_BLOCKS]; + struct buffer_head *chosen[MD_SB_DISKS]; PRINTK("compute_parity, stripe %lu, method %d\n", sh->sector, method); - for (i = 0; i < disks; i++) { - char *bdata; - if (i == pd_idx || !sh->bh_new[i]) - continue; - if (!sh->bh_copy[i]) - sh->bh_copy[i] = raid5_alloc_buffer(sh, sh->size); - raid5_build_block(sh, sh->bh_copy[i], i); - atomic_set_buffer_dirty(sh->bh_copy[i]); - bdata = bh_kmap(sh->bh_new[i]); - memcpy(sh->bh_copy[i]->b_data, bdata, sh->size); - bh_kunmap(sh->bh_new[i]); - } - if (sh->bh_copy[pd_idx] == NULL) { - sh->bh_copy[pd_idx] = raid5_alloc_buffer(sh, sh->size); - atomic_set_buffer_dirty(sh->bh_copy[pd_idx]); - } - raid5_build_block(sh, sh->bh_copy[pd_idx], sh->pd_idx); + memset(chosen, 0, sizeof(chosen)); - if (method == RECONSTRUCT_WRITE) { - memset(sh->bh_copy[pd_idx]->b_data, 0, sh->size); - bh_ptr[0] = sh->bh_copy[pd_idx]; - count = 1; - for (i = 0; i < disks; i++) { - if (i == sh->pd_idx) + count = 1; + bh_ptr[0] = sh->bh_cache[pd_idx]; + spin_lock_irq(&conf->device_lock); + switch(method) { + case READ_MODIFY_WRITE: + if (!buffer_uptodate(sh->bh_cache[pd_idx])) + BUG(); + for (i=disks ; i-- ;) { + if (i==pd_idx) continue; - if (sh->bh_new[i]) { - bh_ptr[count++] = sh->bh_copy[i]; - } else if (sh->bh_old[i]) { - bh_ptr[count++] = sh->bh_old[i]; + if (sh->bh_write[i] && + buffer_uptodate(sh->bh_cache[i])) { + bh_ptr[count++] = sh->bh_cache[i]; + chosen[i] = sh->bh_write[i]; + sh->bh_write[i] = sh->bh_write[i]->b_reqnext; + chosen[i]->b_reqnext = sh->bh_written[i]; + sh->bh_written[i] = chosen[i]; + check_xor(); } - if (count == MAX_XOR_BLOCKS) { - xor_block(count, &bh_ptr[0]); - count = 1; - } - } - if (count != 1) { - xor_block(count, &bh_ptr[0]); } - } else if (method == READ_MODIFY_WRITE) { - memcpy(sh->bh_copy[pd_idx]->b_data, sh->bh_old[pd_idx]->b_data, sh->size); - bh_ptr[0] = sh->bh_copy[pd_idx]; - count = 1; - for (i = 0; i < disks; i++) { - if (i == sh->pd_idx) - continue; - if (sh->bh_new[i] && sh->bh_old[i]) { - bh_ptr[count++] = sh->bh_copy[i]; - bh_ptr[count++] = sh->bh_old[i]; + break; + case RECONSTRUCT_WRITE: + memset(sh->bh_cache[pd_idx]->b_data, 0, sh->size); + for (i= disks; i-- ;) + if (i!=pd_idx && sh->bh_write[i]) { + chosen[i] = sh->bh_write[i]; + sh->bh_write[i] = sh->bh_write[i]->b_reqnext; + chosen[i]->b_reqnext = sh->bh_written[i]; + sh->bh_written[i] = chosen[i]; + check_xor(); } - if (count >= (MAX_XOR_BLOCKS - 1)) { - xor_block(count, &bh_ptr[0]); - count = 1; + break; + case CHECK_PARITY: + break; + } + spin_unlock_irq(&conf->device_lock); + for (i = disks; i--;) + if (chosen[i]) { + struct buffer_head *bh = sh->bh_cache[i]; + char *bdata; + mark_buffer_clean(chosen[i]); /* NO FIXME */ + bdata = bh_kmap(chosen[i]); + memcpy(bh->b_data, + bdata,sh->size); + bh_kunmap(chosen[i]); + set_bit(BH_Lock, &bh->b_state); + mark_buffer_uptodate(bh, 1); + } + + switch(method) { + case RECONSTRUCT_WRITE: + case CHECK_PARITY: + for (i=disks; i--;) + if (i != pd_idx) { + bh_ptr[count++] = sh->bh_cache[i]; + check_xor(); + } + break; + case READ_MODIFY_WRITE: + for (i = disks; i--;) + if (chosen[i]) { + bh_ptr[count++] = sh->bh_cache[i]; + check_xor(); } - } - if (count != 1) { - xor_block(count, &bh_ptr[0]); - } } - raid5_mark_buffer_uptodate(sh->bh_copy[pd_idx], 1); + if (count != 1) + xor_block(count, bh_ptr); + + if (method != CHECK_PARITY) { + mark_buffer_uptodate(sh->bh_cache[pd_idx], 1); + set_bit(BH_Lock, &sh->bh_cache[pd_idx]->b_state); + } else + mark_buffer_uptodate(sh->bh_cache[pd_idx], 0); } static void add_stripe_bh (struct stripe_head *sh, struct buffer_head *bh, int dd_idx, int rw) { + struct buffer_head **bhp; raid5_conf_t *conf = sh->raid_conf; - struct buffer_head *bh_req; PRINTK("adding bh b#%lu to stripe s#%lu\n", bh->b_blocknr, sh->sector); - CHECK_SHLOCK(sh); - if (sh->bh_new[dd_idx]) - BUG(); - bh_req = raid5_alloc_bh(sh); - raid5_build_block(sh, bh_req, dd_idx); - bh_req->b_data = bh->b_data; - bh_req->b_page = bh->b_page; - md_spin_lock_irq(&conf->device_lock); - if (sh->phase == PHASE_COMPLETE && sh->cmd == STRIPE_NONE) { - PRINTK("stripe s#%lu => PHASE_BEGIN (%s)\n", sh->sector, rw == READ ? "read" : "write"); - sh->phase = PHASE_BEGIN; - sh->cmd = (rw == READ) ? STRIPE_READ : STRIPE_WRITE; - atomic_inc(&conf->nr_pending_stripes); - atomic_inc(&conf->nr_handle); - PRINTK("# of pending stripes: %u, # of handle: %u\n", atomic_read(&conf->nr_pending_stripes), atomic_read(&conf->nr_handle)); + spin_lock_irq(&conf->device_lock); + bh->b_reqnext = NULL; + if (rw == READ) + bhp = &sh->bh_read[dd_idx]; + else + bhp = &sh->bh_write[dd_idx]; + while (*bhp) { + printk(KERN_NOTICE "raid5: multiple %d requests for sector %ld\n", rw, sh->sector); + bhp = & (*bhp)->b_reqnext; } - sh->bh_new[dd_idx] = bh; - sh->bh_req[dd_idx] = bh_req; - sh->cmd_new[dd_idx] = rw; - sh->new[dd_idx] = 1; - md_spin_unlock_irq(&conf->device_lock); + *bhp = bh; + spin_unlock_irq(&conf->device_lock); PRINTK("added bh b#%lu to stripe s#%lu, disk %d.\n", bh->b_blocknr, sh->sector, dd_idx); } -static void complete_stripe(struct stripe_head *sh) -{ - raid5_conf_t *conf = sh->raid_conf; - int disks = conf->raid_disks; - int i, new = 0; - - PRINTK("complete_stripe %lu\n", sh->sector); - for (i = 0; i < disks; i++) { - if (sh->cmd == STRIPE_SYNC && sh->bh_copy[i]) - raid5_update_old_bh(sh, i); - if (sh->cmd == STRIPE_WRITE && i == sh->pd_idx) - raid5_update_old_bh(sh, i); - if (sh->bh_new[i]) { - PRINTK("stripe %lu finishes new bh, sh->new == %d\n", sh->sector, sh->new[i]); - if (!sh->new[i]) { -#if 0 - if (sh->cmd == STRIPE_WRITE) { - char *bdata = bh_kmap(sh->bh_new[i]); - if (memcmp(bdata, sh->bh_copy[i]->b_data, sh->size)) { - printk("copy differs, %s, sector %lu ", - test_bit(BH_Dirty, &sh->bh_new[i]->b_state) ? "dirty" : "clean", - sh->sector); - } else if (test_bit(BH_Dirty, &sh->bh_new[i]->b_state)) - printk("sector %lu dirty\n", sh->sector); - bh_kunmap(sh->bh_new[i]); - } -#endif - if (sh->cmd == STRIPE_WRITE) - raid5_update_old_bh(sh, i); - raid5_end_buffer_io(sh, i, 1); - continue; - } else - new++; - } - if (new && sh->cmd == STRIPE_WRITE) - printk("raid5: bug, completed STRIPE_WRITE with new == %d\n", new); - } - if (sh->cmd == STRIPE_SYNC) - md_done_sync(conf->mddev, (sh->size>>10) - sh->sync_redone,1); - if (!new) - finish_unlock_stripe(sh); - else { - PRINTK("stripe %lu, new == %d\n", sh->sector, new); - sh->phase = PHASE_BEGIN; - } -} - - -static void handle_stripe_write (mddev_t *mddev , raid5_conf_t *conf, - struct stripe_head *sh, int nr_write, int * operational, int disks, - int parity, int parity_failed, int nr_cache, int nr_cache_other, - int nr_failed_other, int nr_cache_overwrite, int nr_failed_overwrite) -{ - int i; - unsigned int block; - struct buffer_head *bh; - int method1 = INT_MAX, method2 = INT_MAX; - -#if 0 - /* - * Attempt to add entries :-) - */ - if (nr_write != disks - 1) { - for (i = 0; i < disks; i++) { - if (i == sh->pd_idx) - continue; - if (sh->bh_new[i]) - continue; - block = (int) compute_blocknr(sh, i); - bh = get_hash_table(mddev_to_kdev(mddev), block, sh->size); - if (!bh) - continue; - if (buffer_dirty(bh) && !md_test_and_set_bit(BH_Lock, &bh->b_state)) { - PRINTK("Whee.. sector %lu, index %d (%d) found in the buffer cache!\n", sh->sector, i, block); - add_stripe_bh(sh, bh, i, WRITE); - sh->new[i] = 0; - nr_write++; - if (sh->bh_old[i]) { - nr_cache_overwrite++; - nr_cache_other--; - } else - if (!operational[i]) { - nr_failed_overwrite++; - nr_failed_other--; - } - } - atomic_dec(&bh->b_count); - } - } -#endif - PRINTK("handle_stripe() -- begin writing, stripe %lu\n", sh->sector); - /* - * Writing, need to update parity buffer. - * - * Compute the number of I/O requests in the "reconstruct - * write" and "read modify write" methods. - */ - if (!nr_failed_other) - method1 = (disks - 1) - (nr_write + nr_cache_other); - if (!nr_failed_overwrite && !parity_failed) - method2 = nr_write - nr_cache_overwrite + (1 - parity); - - if (method1 == INT_MAX && method2 == INT_MAX) - BUG(); - PRINTK("handle_stripe(), sector %lu, nr_write %d, method1 %d, method2 %d\n", sh->sector, nr_write, method1, method2); - if (!method1 || !method2) { - sh->phase = PHASE_WRITE; - compute_parity(sh, method1 <= method2 ? RECONSTRUCT_WRITE : READ_MODIFY_WRITE); - for (i = 0; i < disks; i++) { - if (!operational[i] && !conf->spare && !conf->resync_parity) - continue; - bh = sh->bh_copy[i]; - if (i != sh->pd_idx && ((bh == NULL) ^ (sh->bh_new[i] == NULL))) - printk("raid5: bug: bh == %p, bh_new[%d] == %p\n", bh, i, sh->bh_new[i]); - if (i == sh->pd_idx && !bh) - printk("raid5: bug: bh == NULL, i == pd_idx == %d\n", i); - if (bh) { - PRINTK("making request for buffer %d\n", i); - lock_get_bh(bh); - if (!operational[i] && !conf->resync_parity) { - PRINTK("writing spare %d\n", i); - atomic_inc(&sh->nr_pending); - bh->b_dev = bh->b_rdev = conf->spare->dev; - generic_make_request(WRITE, bh); - } else { - atomic_inc(&sh->nr_pending); - bh->b_dev = bh->b_rdev = conf->disks[i].dev; - generic_make_request(WRITE, bh); - } - atomic_dec(&bh->b_count); - } - } - PRINTK("handle_stripe() %lu, writing back %d buffers\n", sh->sector, md_atomic_read(&sh->nr_pending)); - return; - } - if (method1 < method2) { - sh->write_method = RECONSTRUCT_WRITE; - for (i = 0; i < disks; i++) { - if (i == sh->pd_idx) - continue; - if (sh->bh_new[i] || sh->bh_old[i]) - continue; - sh->bh_old[i] = raid5_alloc_buffer(sh, sh->size); - raid5_build_block(sh, sh->bh_old[i], i); - } - } else { - sh->write_method = READ_MODIFY_WRITE; - for (i = 0; i < disks; i++) { - if (sh->bh_old[i]) - continue; - if (!sh->bh_new[i] && i != sh->pd_idx) - continue; - sh->bh_old[i] = raid5_alloc_buffer(sh, sh->size); - raid5_build_block(sh, sh->bh_old[i], i); - } - } - sh->phase = PHASE_READ_OLD; - for (i = 0; i < disks; i++) { - if (!sh->bh_old[i]) - continue; - if (test_bit(BH_Uptodate, &sh->bh_old[i]->b_state)) - continue; - lock_get_bh(sh->bh_old[i]); - atomic_inc(&sh->nr_pending); - sh->bh_old[i]->b_dev = sh->bh_old[i]->b_rdev = conf->disks[i].dev; - generic_make_request(READ, sh->bh_old[i]); - atomic_dec(&sh->bh_old[i]->b_count); - } - PRINTK("handle_stripe() %lu, reading %d old buffers\n", sh->sector, md_atomic_read(&sh->nr_pending)); -} /* - * Reading + * handle_stripe - do things to a stripe. + * + * We lock the stripe and then examine the state of various bits + * to see what needs to be done. + * Possible results: + * return some read request which now have data + * return some write requests which are safely on disc + * schedule a read on some buffers + * schedule a write of some buffers + * return confirmation of parity correctness + * + * Parity calculations are done inside the stripe lock + * buffers are taken off read_list or write_list, and bh_cache buffers + * get BH_Lock set before the stripe lock is released. + * */ -static void handle_stripe_read (mddev_t *mddev , raid5_conf_t *conf, - struct stripe_head *sh, int nr_read, int * operational, int disks, - int parity, int parity_failed, int nr_cache, int nr_cache_other, - int nr_failed_other, int nr_cache_overwrite, int nr_failed_overwrite) + +static void handle_stripe(struct stripe_head *sh) { + raid5_conf_t *conf = sh->raid_conf; + int disks = conf->raid_disks; + struct buffer_head *return_ok= NULL, *return_fail = NULL; + int action[MD_SB_DISKS]; int i; - int method1 = INT_MAX; - - method1 = nr_read - nr_cache_overwrite; - - PRINTK("handle_stripe(), sector %lu, nr_read %d, nr_cache %d, method1 %d\n", sh->sector, nr_read, nr_cache, method1); + int syncing; + int locked=0, uptodate=0, to_read=0, to_write=0, failed=0, written=0; + int failed_num=0; + struct buffer_head *bh; - if (!method1 || (method1 == 1 && nr_cache == disks - 1)) { - PRINTK("read %lu completed from cache\n", sh->sector); - for (i = 0; i < disks; i++) { - char *bdata; - if (!sh->bh_new[i]) - continue; - if (!sh->bh_old[i]) - compute_block(sh, i); - bdata = bh_kmap(sh->bh_new[i]); - memcpy(bdata, sh->bh_old[i]->b_data, sh->size); - bh_kunmap(sh->bh_new[i]); - } - complete_stripe(sh); - return; - } - if (nr_failed_overwrite) { - sh->phase = PHASE_READ_OLD; - for (i = 0; i < disks; i++) { - if (sh->bh_old[i]) - continue; - if (!operational[i]) - continue; - sh->bh_old[i] = raid5_alloc_buffer(sh, sh->size); - raid5_build_block(sh, sh->bh_old[i], i); - lock_get_bh(sh->bh_old[i]); - atomic_inc(&sh->nr_pending); - sh->bh_old[i]->b_dev = sh->bh_old[i]->b_rdev = conf->disks[i].dev; - generic_make_request(READ, sh->bh_old[i]); - atomic_dec(&sh->bh_old[i]->b_count); - } - PRINTK("handle_stripe() %lu, phase READ_OLD, pending %d buffers\n", sh->sector, md_atomic_read(&sh->nr_pending)); - return; - } - sh->phase = PHASE_READ; - for (i = 0; i < disks; i++) { - if (!sh->bh_new[i]) - continue; - if (sh->bh_old[i]) { - char *bdata = bh_kmap(sh->bh_new[i]); - memcpy(bdata, sh->bh_old[i]->b_data, sh->size); - bh_kunmap(sh->bh_new[i]); - continue; - } -#if RAID5_PARANOIA - if (sh->bh_req[i] == NULL || test_bit(BH_Lock, &sh->bh_req[i]->b_state)) { - int j; - printk("req %d is NULL! or locked \n", i); - for (j=0; jbh_new[j], sh->bh_old[j], sh->bh_req[j], - sh->new[j], sh->cmd_new[j]); + PRINTK("handling stripe %ld, cnt=%d, pd_idx=%d\n", sh->sector, atomic_read(&sh->count), sh->pd_idx); + memset(action, 0, sizeof(action)); + + spin_lock(&sh->lock); + clear_bit(STRIPE_HANDLE, &sh->state); + + syncing = test_bit(STRIPE_SYNCING, &sh->state); + /* Now to look around and see what can be done */ + + for (i=disks; i--; ) { + bh = sh->bh_cache[i]; + PRINTK("check %d: state %lx read %p write %p written %p\n", i, bh->b_state, sh->bh_read[i], sh->bh_write[i], sh->bh_written[i]); + /* maybe we can reply to a read */ + if (buffer_uptodate(bh) && sh->bh_read[i]) { + struct buffer_head *rbh, *rbh2; + PRINTK("Return read for disc %d\n", i); + spin_lock_irq(&conf->device_lock); + rbh = sh->bh_read[i]; + sh->bh_read[i] = NULL; + spin_unlock_irq(&conf->device_lock); + while (rbh) { + char *bdata; + bdata = bh_kmap(rbh); + memcpy(bdata, bh->b_data, bh->b_size); + bh_kunmap(rbh); + rbh2 = rbh->b_reqnext; + rbh->b_reqnext = return_ok; + return_ok = rbh; + rbh = rbh2; } - } -#endif - lock_get_bh(sh->bh_req[i]); - atomic_inc(&sh->nr_pending); - sh->bh_req[i]->b_dev = sh->bh_req[i]->b_rdev = conf->disks[i].dev; - generic_make_request(READ, sh->bh_req[i]); - atomic_dec(&sh->bh_req[i]->b_count); - } - PRINTK("handle_stripe() %lu, phase READ, pending %d\n", sh->sector, md_atomic_read(&sh->nr_pending)); -} -/* - * Syncing - */ -static void handle_stripe_sync (mddev_t *mddev , raid5_conf_t *conf, - struct stripe_head *sh, int * operational, int disks, - int parity, int parity_failed, int nr_cache, int nr_cache_other, - int nr_failed_other, int nr_cache_overwrite, int nr_failed_overwrite) -{ - struct buffer_head *bh; - int i, pd_idx; - - /* firstly, we want to have data from all non-failed drives - * in bh_old - */ - PRINTK("handle_stripe_sync: sec=%lu disks=%d nr_cache=%d\n", sh->sector, disks, nr_cache); - if ((nr_cache < disks-1) || ((nr_cache == disks-1) && !(parity_failed+nr_failed_other+nr_failed_overwrite)) - ) { - sh->phase = PHASE_READ_OLD; - for (i = 0; i < disks; i++) { - if (sh->bh_old[i]) - continue; - if (!conf->disks[i].operational) - continue; + /* now count some things */ + if (buffer_locked(bh)) locked++; + if (buffer_uptodate(bh)) uptodate++; - bh = raid5_alloc_buffer(sh, sh->size); - sh->bh_old[i] = bh; - raid5_build_block(sh, bh, i); - lock_get_bh(bh); - atomic_inc(&sh->nr_pending); - bh->b_dev = bh->b_rdev = conf->disks[i].dev; - generic_make_request(READ, bh); - md_sync_acct(bh->b_rdev, bh->b_size/512); - atomic_dec(&sh->bh_old[i]->b_count); + + if (sh->bh_read[i]) to_read++; + if (sh->bh_write[i]) to_write++; + if (sh->bh_written[i]) written++; + if (!conf->disks[i].operational) { + failed++; + failed_num = i; } - PRINTK("handle_stripe_sync() %lu, phase READ_OLD, pending %d buffers\n", sh->sector, md_atomic_read(&sh->nr_pending)); - - return; } - /* now, if there is a failed drive, rebuild and write to spare */ - if (nr_cache == disks-1) { - sh->phase = PHASE_WRITE; - /* we can generate the missing block, which will be on the failed drive */ - for (i=0; ispare) { - bh = sh->bh_copy[i]; - if (bh) { - memcpy(bh->b_data, sh->bh_old[i]->b_data, sh->size); - set_bit(BH_Uptodate, &bh->b_state); - } else { - bh = sh->bh_old[i]; - sh->bh_old[i] = NULL; - sh->bh_copy[i] = bh; + PRINTK("locked=%d uptodate=%d to_read=%d to_write=%d failed=%d failed_num=%d\n", + locked, uptodate, to_read, to_write, failed, failed_num); + /* check if the array has lost two devices and, if so, some requests might + * need to be failed + */ + if (failed > 1 && to_read+to_write) { + spin_lock_irq(&conf->device_lock); + for (i=disks; i--; ) { + /* fail all writes first */ + if (sh->bh_write[i]) to_write--; + while ((bh = sh->bh_write[i])) { + sh->bh_write[i] = bh->b_reqnext; + bh->b_reqnext = return_fail; + return_fail = bh; + } + /* fail any reads if this device is non-operational */ + if (!conf->disks[i].operational) { + if (sh->bh_read[i]) to_read--; + while ((bh = sh->bh_read[i])) { + sh->bh_read[i] = bh->b_reqnext; + bh->b_reqnext = return_fail; + return_fail = bh; } - atomic_inc(&sh->nr_pending); - lock_get_bh(bh); - bh->b_dev = bh->b_rdev = conf->spare->dev; - generic_make_request(WRITE, bh); - md_sync_acct(bh->b_rdev, bh->b_size/512); - atomic_dec(&bh->b_count); - PRINTK("handle_stripe_sync() %lu, phase WRITE, pending %d buffers\n", sh->sector, md_atomic_read(&sh->nr_pending)); } - break; } - return; + spin_unlock_irq(&conf->device_lock); + if (syncing) { + md_done_sync(conf->mddev, (sh->size>>10) - sh->sync_redone,0); + clear_bit(STRIPE_SYNCING, &sh->state); + syncing = 0; + } } - /* nr_cache == disks: - * check parity and compute/write if needed + /* might be able to return some write requests if the parity block + * is safe, or on a failed drive */ - - compute_parity(sh, RECONSTRUCT_WRITE); - pd_idx = sh->pd_idx; - if (!memcmp(sh->bh_copy[pd_idx]->b_data, sh->bh_old[pd_idx]->b_data, sh->size)) { - /* the parity is correct - Yay! */ - complete_stripe(sh); - } else { - sh->phase = PHASE_WRITE; - bh = sh->bh_copy[pd_idx]; - atomic_set_buffer_dirty(bh); - lock_get_bh(bh); - atomic_inc(&sh->nr_pending); - bh->b_dev = bh->b_rdev = conf->disks[pd_idx].dev; - generic_make_request(WRITE, bh); - md_sync_acct(bh->b_rdev, bh->b_size/512); - atomic_dec(&bh->b_count); - PRINTK("handle_stripe_sync() %lu phase WRITE, pending %d buffers\n", - sh->sector, md_atomic_read(&sh->nr_pending)); - } -} - -/* - * handle_stripe() is our main logic routine. Note that: - * - * 1. lock_stripe() should be used whenever we can't accept additonal - * buffers, either during short sleeping in handle_stripe() or - * during io operations. - * - * 2. We should be careful to set sh->nr_pending whenever we sleep, - * to prevent re-entry of handle_stripe() for the same sh. - * - * 3. conf->failed_disks and disk->operational can be changed - * from an interrupt. This complicates things a bit, but it allows - * us to stop issuing requests for a failed drive as soon as possible. - */ -static void handle_stripe(struct stripe_head *sh) -{ - raid5_conf_t *conf = sh->raid_conf; - mddev_t *mddev = conf->mddev; - int disks = conf->raid_disks; - int i, nr_read = 0, nr_write = 0, parity = 0; - int nr_cache = 0, nr_cache_other = 0, nr_cache_overwrite = 0; - int nr_failed_other = 0, nr_failed_overwrite = 0, parity_failed = 0; - int operational[MD_SB_DISKS], failed_disks = conf->failed_disks; - - PRINTK("handle_stripe(), stripe %lu\n", sh->sector); - if (!stripe_locked(sh)) - BUG(); - if (md_atomic_read(&sh->nr_pending)) - BUG(); - if (sh->phase == PHASE_COMPLETE) - BUG(); - - atomic_dec(&conf->nr_handle); - - if (md_test_and_clear_bit(STRIPE_ERROR, &sh->state)) { - printk("raid5: restarting stripe %lu\n", sh->sector); - sh->phase = PHASE_BEGIN; - } - - if ((sh->cmd == STRIPE_WRITE && sh->phase == PHASE_WRITE) || - (sh->cmd == STRIPE_READ && sh->phase == PHASE_READ) || - (sh->cmd == STRIPE_SYNC && sh->phase == PHASE_WRITE) - ) { - /* - * Completed - */ - complete_stripe(sh); - if (sh->phase == PHASE_COMPLETE) - return; - } - - md_spin_lock_irq(&conf->device_lock); - for (i = 0; i < disks; i++) { - operational[i] = conf->disks[i].operational; - if (i == sh->pd_idx && conf->resync_parity) - operational[i] = 0; - } - failed_disks = conf->failed_disks; - md_spin_unlock_irq(&conf->device_lock); - - /* - * Make this one more graceful? - */ - if (failed_disks > 1) { - for (i = 0; i < disks; i++) { - if (sh->bh_new[i]) { - raid5_end_buffer_io(sh, i, 0); - continue; + bh = sh->bh_cache[sh->pd_idx]; + if ( written && + ( (conf->disks[sh->pd_idx].operational && !buffer_locked(bh) && buffer_uptodate(bh)) + || (failed == 1 && failed_num == sh->pd_idx)) + ) { + /* any written block on a uptodate or failed drive can be returned */ + for (i=disks; i--; ) + if (sh->bh_written[i]) { + bh = sh->bh_cache[i]; + if (!conf->disks[sh->pd_idx].operational || + (!buffer_locked(bh) && buffer_uptodate(bh)) ) { + /* maybe we can return some write requests */ + struct buffer_head *wbh, *wbh2; + PRINTK("Return write for disc %d\n", i); + spin_lock_irq(&conf->device_lock); + wbh = sh->bh_written[i]; + sh->bh_written[i] = NULL; + spin_unlock_irq(&conf->device_lock); + while (wbh) { + wbh2 = wbh->b_reqnext; + wbh->b_reqnext = return_ok; + return_ok = wbh; + wbh = wbh2; } + } } - if (sh->cmd == STRIPE_SYNC) - md_done_sync(conf->mddev, (sh->size>>10) - sh->sync_redone,1); - finish_unlock_stripe(sh); - return; } - - PRINTK("=== stripe index START ===\n"); - for (i = 0; i < disks; i++) { - PRINTK("disk %d, ", i); - if (sh->bh_old[i]) { - nr_cache++; - PRINTK(" (old cached, %d)", nr_cache); - } - if (i == sh->pd_idx) { - PRINTK(" PARITY."); - if (sh->bh_old[i]) { - PRINTK(" CACHED."); - parity = 1; - } else { - PRINTK(" UNCACHED."); - if (!operational[i]) { - PRINTK(" FAILED."); - parity_failed = 1; + + /* Now we might consider reading some blocks, either to check/generate + * parity, or to satisfy requests + */ + if (to_read || (syncing && (uptodate+failed < disks))) { + for (i=disks; i--;) { + bh = sh->bh_cache[i]; + if (!buffer_locked(bh) && !buffer_uptodate(bh) && + (sh->bh_read[i] || syncing || (failed && sh->bh_read[failed_num]))) { + /* we would like to get this block, possibly + * by computing it, but we might not be able to + */ + if (uptodate == disks-1) { + PRINTK("Computing block %d\n", i); + compute_block(sh, i); + uptodate++; + } else if (conf->disks[i].operational) { + set_bit(BH_Lock, &bh->b_state); + action[i] = READ+1; + locked++; + PRINTK("Reading block %d (sync=%d)\n", i, syncing); + if (syncing) + md_sync_acct(conf->disks[i].dev, bh->b_size>>9); } } - PRINTK("\n"); - continue; } - if (!sh->bh_new[i]) { - PRINTK(" (no new data block) "); - if (sh->bh_old[i]) { - PRINTK(" (but old block cached) "); - nr_cache_other++; - } else { - if (!operational[i]) { - PRINTK(" (because failed disk) "); - nr_failed_other++; - } else - PRINTK(" (no old block either) "); + set_bit(STRIPE_HANDLE, &sh->state); + } + + /* now to consider writing and what else, if anything should be read */ + if (to_write) { + int rmw=0, rcw=0; + for (i=disks ; i--;) { + /* would I have to read this buffer for read_modify_write */ + bh = sh->bh_cache[i]; + if ((sh->bh_write[i] || i == sh->pd_idx) && + !buffer_locked(bh) && !buffer_uptodate(bh)) { + if (conf->disks[i].operational +/* && !(conf->resync_parity && i == sh->pd_idx) */ + ) + rmw++; + else rmw += 2*disks; /* cannot read it */ + } + /* Would I have to read this buffer for reconstruct_write */ + if (!sh->bh_write[i] && i != sh->pd_idx && + !buffer_locked(bh) && !buffer_uptodate(bh)) { + if (conf->disks[i].operational) rcw++; + else rcw += 2*disks; } - PRINTK("\n"); - continue; - } - sh->new[i] = 0; - if (sh->cmd_new[i] == READ) { - nr_read++; - PRINTK(" (new READ %d)", nr_read); - } - if (sh->cmd_new[i] == WRITE) { - nr_write++; - PRINTK(" (new WRITE %d)", nr_write); } - if (sh->bh_old[i]) { - nr_cache_overwrite++; - PRINTK(" (overwriting old %d)", nr_cache_overwrite); - } else { - if (!operational[i]) { - nr_failed_overwrite++; - PRINTK(" (overwriting failed %d)", nr_failed_overwrite); + PRINTK("for sector %ld, rmw=%d rcw=%d\n", sh->sector, rmw, rcw); + set_bit(STRIPE_HANDLE, &sh->state); + if (rmw < rcw && rmw > 0) + /* prefer read-modify-write, but need to get some data */ + for (i=disks; i--;) { + bh = sh->bh_cache[i]; + if ((sh->bh_write[i] || i == sh->pd_idx) && + !buffer_locked(bh) && !buffer_uptodate(bh) && + conf->disks[i].operational) { + PRINTK("Read_old block %d for r-m-w\n", i); + set_bit(BH_Lock, &bh->b_state); + action[i] = READ+1; + locked++; + } + } + if (rcw <= rmw && rcw > 0) + /* want reconstruct write, but need to get some data */ + for (i=disks; i--;) { + bh = sh->bh_cache[i]; + if (!sh->bh_write[i] && i != sh->pd_idx && + !buffer_locked(bh) && !buffer_uptodate(bh) && + conf->disks[i].operational) { + PRINTK("Read_old block %d for Reconstruct\n", i); + set_bit(BH_Lock, &bh->b_state); + action[i] = READ+1; + locked++; + } } + /* now if nothing is locked, and if we have enough data, we can start a write request */ + if (locked == 0 && (rcw == 0 ||rmw == 0)) { + PRINTK("Computing parity...\n"); + compute_parity(sh, rcw==0 ? RECONSTRUCT_WRITE : READ_MODIFY_WRITE); + /* now every locked buffer is ready to be written */ + for (i=disks; i--;) + if (buffer_locked(sh->bh_cache[i])) { + PRINTK("Writing block %d\n", i); + locked++; + action[i] = WRITE+1; + if (!conf->disks[i].operational + || (i==sh->pd_idx && failed == 0)) + set_bit(STRIPE_INSYNC, &sh->state); + } } - PRINTK("\n"); } - PRINTK("=== stripe index END ===\n"); - if (nr_write && nr_read) - BUG(); + /* maybe we need to check and possibly fix the parity for this stripe + * Any reads will already have been scheduled, so we just see if enough data + * is available + */ + if (syncing && locked == 0 && + !test_bit(STRIPE_INSYNC, &sh->state) && failed <= 1) { + set_bit(STRIPE_HANDLE, &sh->state); + if (failed == 0) { + if (uptodate != disks) + BUG(); + compute_parity(sh, CHECK_PARITY); + uptodate--; + bh = sh->bh_cache[sh->pd_idx]; + if ((*(u32*)bh->b_data) == 0 && + !memcmp(bh->b_data, bh->b_data+4, bh->b_size-4)) { + /* parity is correct (on disc, not in buffer any more) */ + set_bit(STRIPE_INSYNC, &sh->state); + } + } + if (!test_bit(STRIPE_INSYNC, &sh->state)) { + if (failed==0) + failed_num = sh->pd_idx; + /* should be able to compute the missing block and write it to spare */ + if (!buffer_uptodate(sh->bh_cache[failed_num])) { + if (uptodate+1 != disks) + BUG(); + compute_block(sh, failed_num); + uptodate++; + } + if (uptodate != disks) + BUG(); + bh = sh->bh_cache[failed_num]; + set_bit(BH_Lock, &bh->b_state); + action[failed_num] = WRITE+1; + locked++; + set_bit(STRIPE_INSYNC, &sh->state); + if (conf->disks[i].operational) + md_sync_acct(conf->disks[i].dev, bh->b_size>>9); + else if (conf->spare) + md_sync_acct(conf->spare->dev, bh->b_size>>9); - if (nr_write) - handle_stripe_write( - mddev, conf, sh, nr_write, operational, disks, - parity, parity_failed, nr_cache, nr_cache_other, - nr_failed_other, nr_cache_overwrite, - nr_failed_overwrite - ); - else if (nr_read) - handle_stripe_read( - mddev, conf, sh, nr_read, operational, disks, - parity, parity_failed, nr_cache, nr_cache_other, - nr_failed_other, nr_cache_overwrite, - nr_failed_overwrite - ); - else if (sh->cmd == STRIPE_SYNC) - handle_stripe_sync( - mddev, conf, sh, operational, disks, - parity, parity_failed, nr_cache, nr_cache_other, - nr_failed_other, nr_cache_overwrite, nr_failed_overwrite - ); + } + } + if (syncing && locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) { + md_done_sync(conf->mddev, (sh->size>>10) - sh->sync_redone,1); + clear_bit(STRIPE_SYNCING, &sh->state); + } + + + spin_unlock(&sh->lock); + + while ((bh=return_ok)) { + return_ok = bh->b_reqnext; + bh->b_reqnext = NULL; + bh->b_end_io(bh, 1); + } + while ((bh=return_fail)) { + return_ok = bh->b_reqnext; + bh->b_reqnext = NULL; + bh->b_end_io(bh, 0); + } + for (i=disks; i-- ;) + if (action[i]) { + struct buffer_head *bh = sh->bh_cache[i]; + int skip = 0; + if (action[i] == READ+1) + bh->b_end_io = raid5_end_read_request; + else + bh->b_end_io = raid5_end_write_request; + if (conf->disks[i].operational) + bh->b_dev = conf->disks[i].dev; + else if (conf->spare && action[i] == WRITE+1) + bh->b_dev = conf->spare->dev; + else if (action[i] == READ+1) + BUG(); + else skip=1; + if (!skip) { + PRINTK("for %ld schedule op %d on disc %d\n", sh->sector, action[i]-1, i); + atomic_inc(&sh->count); + bh->b_rdev = bh->b_dev; + bh->b_rsector = bh->b_blocknr * (bh->b_size>>9); + generic_make_request(action[i]-1, bh); + } else + PRINTK("skip op %d on disc %d for sector %ld\n", action[i]-1, i, sh->sector); + } } @@ -1465,34 +1109,28 @@ static int raid5_make_request (mddev_t *mddev, int rw, struct buffer_head * bh) const unsigned int data_disks = raid_disks - 1; unsigned int dd_idx, pd_idx; unsigned long new_sector; + int read_ahead = 0; struct stripe_head *sh; - if (rw == READA) + if (rw == READA) { rw = READ; + read_ahead=1; + } new_sector = raid5_compute_sector(bh->b_rsector, raid_disks, data_disks, &dd_idx, &pd_idx, conf); PRINTK("raid5_make_request, sector %lu\n", new_sector); - sh = get_lock_stripe(conf, new_sector, bh->b_size); -#if 0 - if ((rw == READ && sh->cmd == STRIPE_WRITE) || (rw == WRITE && sh->cmd == STRIPE_READ)) { - PRINTK("raid5: lock contention, rw == %d, sh->cmd == %d\n", rw, sh->cmd); - lock_stripe(sh); - if (!md_atomic_read(&sh->nr_pending)) - handle_stripe(sh); - goto repeat; - } -#endif - sh->pd_idx = pd_idx; - if (sh->phase != PHASE_COMPLETE && sh->phase != PHASE_BEGIN) - PRINTK("stripe %lu catching the bus!\n", sh->sector); - if (sh->bh_new[dd_idx]) - BUG(); - add_stripe_bh(sh, bh, dd_idx, rw); + sh = get_active_stripe(conf, new_sector, bh->b_size, read_ahead); + if (sh) { + sh->pd_idx = pd_idx; - md_wakeup_thread(conf->thread); + add_stripe_bh(sh, bh, dd_idx, rw); + handle_stripe(sh); + release_stripe(sh); + } else + bh->b_end_io(bh, test_bit(BH_Uptodate, &bh->b_state)); return 0; } @@ -1527,22 +1165,21 @@ static int raid5_sync_request (mddev_t *mddev, unsigned long block_nr) int redone = 0; int bufsize; - if (!conf->buffer_size) - conf->buffer_size = /* device_bsize(mddev_to_kdev(mddev))*/ PAGE_SIZE; - bufsize = conf->buffer_size; - /* Hmm... race on buffer_size ?? */ - redone = block_nr% (bufsize>>10); - block_nr -= redone; - sh = get_lock_stripe(conf, block_nr<<1, bufsize); + sh = get_active_stripe(conf, block_nr<<1, 0, 0); + bufsize = sh->size; + redone = block_nr-(sh->sector>>1); first_sector = raid5_compute_sector(stripe*data_disks*sectors_per_chunk + chunk_offset, raid_disks, data_disks, &dd_idx, &pd_idx, conf); sh->pd_idx = pd_idx; - sh->cmd = STRIPE_SYNC; - sh->phase = PHASE_BEGIN; + spin_lock(&sh->lock); + set_bit(STRIPE_SYNCING, &sh->state); + clear_bit(STRIPE_INSYNC, &sh->state); sh->sync_redone = redone; - atomic_inc(&conf->nr_pending_stripes); - atomic_inc(&conf->nr_handle); - md_wakeup_thread(conf->thread); + spin_unlock(&sh->lock); + + handle_stripe(sh); + release_stripe(sh); + return (bufsize>>10)-redone; } @@ -1558,46 +1195,35 @@ static void raid5d (void *data) struct stripe_head *sh; raid5_conf_t *conf = data; mddev_t *mddev = conf->mddev; - int i, handled; + int handled; PRINTK("+++ raid5d active\n"); handled = 0; - md_spin_lock_irq(&conf->device_lock); - clear_bit(THREAD_WAKEUP, &conf->thread->flags); -repeat_pass: + if (mddev->sb_dirty) { - md_spin_unlock_irq(&conf->device_lock); mddev->sb_dirty = 0; md_update_sb(mddev); - md_spin_lock_irq(&conf->device_lock); } - for (i = 0; i < NR_HASH; i++) { -repeat: - sh = conf->stripe_hashtbl[i]; - for (; sh; sh = sh->hash_next) { - if (sh->raid_conf != conf) - continue; - if (sh->phase == PHASE_COMPLETE) - continue; - if (md_atomic_read(&sh->nr_pending)) - continue; - md_spin_unlock_irq(&conf->device_lock); - if (!atomic_read(&sh->count)) - BUG(); + md_spin_lock_irq(&conf->device_lock); + while (!list_empty(&conf->handle_list)) { + struct list_head *first = conf->handle_list.next; + sh = list_entry(first, struct stripe_head, lru); - handled++; - handle_stripe(sh); - md_spin_lock_irq(&conf->device_lock); - goto repeat; - } - } - if (conf) { - PRINTK("%d stripes handled, nr_handle %d\n", handled, md_atomic_read(&conf->nr_handle)); - if (test_and_clear_bit(THREAD_WAKEUP, &conf->thread->flags) && - md_atomic_read(&conf->nr_handle)) - goto repeat_pass; + list_del_init(first); + atomic_inc(&sh->count); + if (atomic_read(&sh->count)!= 1) + BUG(); + md_spin_unlock_irq(&conf->device_lock); + + handled++; + handle_stripe(sh); + release_stripe(sh); + + md_spin_lock_irq(&conf->device_lock); } + PRINTK("%d stripes handled\n", handled); + md_spin_unlock_irq(&conf->device_lock); PRINTK("--- raid5d inactive\n"); @@ -1729,6 +1355,11 @@ static int raid5_run (mddev_t *mddev) conf->device_lock = MD_SPIN_LOCK_UNLOCKED; md_init_waitqueue_head(&conf->wait_for_stripe); + INIT_LIST_HEAD(&conf->handle_list); + INIT_LIST_HEAD(&conf->inactive_list); + atomic_set(&conf->active_stripes, 0); + conf->buffer_size = PAGE_SIZE; /* good default for rebuild */ + PRINTK("raid5_run(md%d) called.\n", mdidx(mddev)); ITERATE_RDEV(mddev,rdev,tmp) { @@ -1869,8 +1500,7 @@ static int raid5_run (mddev_t *mddev) } memory = conf->max_nr_stripes * (sizeof(struct stripe_head) + - conf->raid_disks * (sizeof(struct buffer_head) + - 2 * (sizeof(struct buffer_head) + PAGE_SIZE))) / 1024; + conf->raid_disks * ((sizeof(struct buffer_head) + PAGE_SIZE))) / 1024; if (grow_stripes(conf, conf->max_nr_stripes, GFP_KERNEL)) { printk(KERN_ERR "raid5: couldn't allocate %dkB for buffers\n", memory); shrink_stripes(conf, conf->max_nr_stripes); @@ -1973,11 +1603,10 @@ static int raid5_stop (mddev_t *mddev) { raid5_conf_t *conf = (raid5_conf_t *) mddev->private; - shrink_stripe_cache(conf, conf->max_nr_stripes); - shrink_stripes(conf, conf->max_nr_stripes); - md_unregister_thread(conf->thread); if (conf->resync_thread) md_unregister_thread(conf->resync_thread); + md_unregister_thread(conf->thread); + shrink_stripes(conf, conf->max_nr_stripes); free_pages((unsigned long) conf->stripe_hashtbl, HASH_PAGES_ORDER); kfree(conf); mddev->private = NULL; @@ -1990,23 +1619,14 @@ static void print_sh (struct stripe_head *sh) { int i; - printk("sh %lu, phase %d, size %d, pd_idx %d, state %ld, cmd %d.\n", sh->sector, sh->phase, sh->size, sh->pd_idx, sh->state, sh->cmd); - printk("sh %lu, write_method %d, nr_pending %d, count %d.\n", sh->sector, sh->write_method, atomic_read(&sh->nr_pending), atomic_read(&sh->count)); + printk("sh %lu, size %d, pd_idx %d, state %ld.\n", sh->sector, sh->size, sh->pd_idx, sh->state); + printk("sh %lu, count %d.\n", sh->sector, atomic_read(&sh->count)); printk("sh %lu, ", sh->sector); for (i = 0; i < MD_SB_DISKS; i++) { - if (sh->bh_old[i]) - printk("(old%d: %p) ", i, sh->bh_old[i]); - if (sh->bh_new[i]) - printk("(new%d: %p) ", i, sh->bh_new[i]); - if (sh->bh_copy[i]) - printk("(copy%d: %p) ", i, sh->bh_copy[i]); - if (sh->bh_req[i]) - printk("(req%d: %p) ", i, sh->bh_req[i]); + if (sh->bh_cache[i]) + printk("(cache%d: %p %ld) ", i, sh->bh_cache[i], sh->bh_cache[i]->b_state); } printk("\n"); - for (i = 0; i < MD_SB_DISKS; i++) - printk("%d(%d/%d) ", i, sh->cmd_new[i], sh->new[i]); - printk("\n"); } static void printall (raid5_conf_t *conf) @@ -2043,13 +1663,6 @@ static int raid5_status (char *page, mddev_t *mddev) #if RAID5_DEBUG #define D(x) \ sz += sprintf (page+sz, "<"#x":%d>", atomic_read(&conf->x)) - D(nr_handle); - D(nr_stripes); - D(nr_hashed_stripes); - D(nr_locked_stripes); - D(nr_pending_stripes); - D(nr_cached_stripes); - D(nr_free_sh); printall(conf); #endif return sz; @@ -2068,7 +1681,11 @@ static void print_raid5_conf (raid5_conf_t *conf) printk(" --- rd:%d wd:%d fd:%d\n", conf->raid_disks, conf->working_disks, conf->failed_disks); +#if RAID5_DEBUG for (i = 0; i < MD_SB_DISKS; i++) { +#else + for (i = 0; i < conf->working_disks+conf->failed_disks; i++) { +#endif tmp = conf->disks + i; printk(" disk %d, s:%d, o:%d, n:%d rd:%d us:%d dev:%s\n", i, tmp->spare,tmp->operational, diff --git a/drivers/media/Makefile b/drivers/media/Makefile index 99517e8123dc..4f89d208ad37 100644 --- a/drivers/media/Makefile +++ b/drivers/media/Makefile @@ -10,6 +10,7 @@ # subdir-y := video radio +mod-subdirs := video radio O_TARGET := media.o obj-y := $(join $(subdir-y),$(subdir-y:%=/%.o)) diff --git a/drivers/net/pcmcia/Makefile b/drivers/net/pcmcia/Makefile index baf9c19ae167..1258504de65a 100644 --- a/drivers/net/pcmcia/Makefile +++ b/drivers/net/pcmcia/Makefile @@ -37,9 +37,9 @@ obj-$(CONFIG_PCMCIA_IBMTR) += ibmtr_cs.o include $(TOPDIR)/Rules.make -.ibmtr.o: ../tokenring/ibmtr.c +tmp-ibmtr.o: ../tokenring/ibmtr.c $(CC) $(CFLAGS) -D__NO_VERSION__ -DPCMCIA -c -o $@ ../tokenring/ibmtr.c -ibmtr_cs.o: .ibmtr.o ibmtr_cs.c - $(CC) $(CFLAGS) -DPCMCIA -c -o .$@ ibmtr_cs.c - $(LD) -r -o $@ .$@ .ibmtr.o +ibmtr_cs.o: tmp-ibmtr.o ibmtr_cs.c + $(CC) $(CFLAGS) -DPCMCIA -c -o tmp-$@ ibmtr_cs.c + $(LD) -r -o $@ tmp-$@ tmp-ibmtr.o diff --git a/drivers/net/sis900.c b/drivers/net/sis900.c index 72949ff74fa2..f58512d86076 100644 --- a/drivers/net/sis900.c +++ b/drivers/net/sis900.c @@ -1,7 +1,7 @@ /* sis900.c: A SiS 900/7016 PCI Fast Ethernet driver for Linux. Copyright 1999 Silicon Integrated System Corporation - Revision: 1.07.06 Nov. 7 2000 - + Revision: 1.07.07 Nov. 29 2000 + Modified from the driver which is originally written by Donald Becker. This software may be used and distributed according to the terms @@ -18,12 +18,13 @@ preliminary Rev. 1.0 Jan. 18, 1998 http://www.sis.com.tw/support/databook.htm + Rev 1.07.07 Nov. 29 2000 Lei-Chun Chang added kernel-doc extractable documentation and 630 workaround fix Rev 1.07.06 Nov. 7 2000 Jeff Garzik some bug fix and cleaning Rev 1.07.05 Nov. 6 2000 metapirat contribute media type select by ifconfig Rev 1.07.04 Sep. 6 2000 Lei-Chun Chang added ICS1893 PHY support - Rev 1.07.03 Aug. 24 2000 Lei-Chun Chang (lcchang@sis.com.tw) modified 630E eqaulizer workaroung rule + Rev 1.07.03 Aug. 24 2000 Lei-Chun Chang (lcchang@sis.com.tw) modified 630E eqaulizer workaround rule Rev 1.07.01 Aug. 08 2000 Ollie Lho minor update for SiS 630E and SiS 630E A1 - Rev 1.07 Mar. 07 2000 Ollie Lho bug fix in Rx buffer ring + Rev 1.07 Mar. 07 2000 Ollie Lho bug fix in Rx buffer ring Rev 1.06.04 Feb. 11 2000 Jeff Garzik softnet and init for kernel 2.4 Rev 1.06.03 Dec. 23 1999 Ollie Lho Third release Rev 1.06.02 Nov. 23 1999 Ollie Lho bug in mac probing fixed @@ -58,7 +59,7 @@ #include "sis900.h" static const char *version = -"sis900.c: v1.07.06 11/07/2000\n"; +"sis900.c: v1.07.07 11/29/2000\n"; static int max_interrupt_work = 20; static int multicast_filter_limit = 128; @@ -169,10 +170,18 @@ static struct net_device_stats *sis900_get_stats(struct net_device *net_dev); static u16 sis900_compute_hashtable_index(u8 *addr); static void set_rx_mode(struct net_device *net_dev); static void sis900_reset(struct net_device *net_dev); -static void sis630e_set_eq(struct net_device *net_dev); +static void sis630_set_eq(struct net_device *net_dev, u8 revision); static int sis900_set_config(struct net_device *dev, struct ifmap *map); -/* older SiS900 and friends, use EEPROM to store MAC address */ +/** + * sis900_get_mac_addr: - Get MAC address for stand alone SiS900 model + * @pci_dev: the sis900 pci device + * @net_dev: the net device to get address for + * + * Older SiS900 and friends, use EEPROM to store MAC address. + * MAC address is read from read_eeprom() into @net_dev->dev_addr. + */ + static int __devinit sis900_get_mac_addr(struct pci_dev * pci_dev, struct net_device *net_dev) { long ioaddr = pci_resource_start(pci_dev, 0); @@ -194,7 +203,16 @@ static int __devinit sis900_get_mac_addr(struct pci_dev * pci_dev, struct net_de return 1; } -/* SiS630E model, use APC CMOS RAM to store MAC address */ +/** + * sis630e_get_mac_addr: - Get MAC address for SiS630E model + * @pci_dev: the sis900 pci device + * @net_dev: the net device to get address for + * + * SiS630E model, use APC CMOS RAM to store MAC address. + * APC CMOS RAM is accessed through ISA bridge. + * MAC address is read into @net_dev->dev_addr. + */ + static int __devinit sis630e_get_mac_addr(struct pci_dev * pci_dev, struct net_device *net_dev) { struct pci_dev *isa_bridge = NULL; @@ -217,6 +235,17 @@ static int __devinit sis630e_get_mac_addr(struct pci_dev * pci_dev, struct net_d return 1; } +/** + * sis900_probe: - Probe for sis900 device + * @pci_dev: the sis900 pci device + * @pci_id: the pci device ID + * + * Check and probe sis900 net device for @pci_dev. + * Get mac address according to the chip revision, + * and assign SiS900-specific entries in the device structure. + * ie: sis900_open(), sis900_start_xmit(), sis900_close(), etc. + */ + static int __devinit sis900_probe (struct pci_dev *pci_dev, const struct pci_device_id *pci_id) { struct sis900_private *sis_priv; @@ -241,7 +270,6 @@ static int __devinit sis900_probe (struct pci_dev *pci_dev, const struct pci_dev net_dev = init_etherdev(NULL, sizeof(struct sis900_private)); if (!net_dev) return -ENOMEM; - SET_MODULE_OWNER(net_dev); if (!request_region(ioaddr, SIS900_TOTAL_SIZE, net_dev->name)) { printk(KERN_ERR "sis900.c: can't allocate I/O space at 0x%lX\n", ioaddr); @@ -250,9 +278,9 @@ static int __devinit sis900_probe (struct pci_dev *pci_dev, const struct pci_dev } pci_read_config_byte(pci_dev, PCI_CLASS_REVISION, &revision); - if (revision == SIS630E_REV || revision == SIS630EA1_REV) + if (revision == SIS630E_900_REV || revision == SIS630EA1_900_REV) ret = sis630e_get_mac_addr(pci_dev, net_dev); - else if (revision == SIS630S_REV) + else if (revision == SIS630S_900_REV) ret = sis630e_get_mac_addr(pci_dev, net_dev); else ret = sis900_get_mac_addr(pci_dev, net_dev); @@ -307,6 +335,15 @@ err_out: return ret; } +/** + * sis900_mii_probe: - Probe MII PHY for sis900 + * @net_dev: the net device to probe for + * + * Search for total of 32 possible mii phy addresses. + * Identify and set current phy if found one, + * return error if it failed to found. + */ + static int __init sis900_mii_probe (struct net_device * net_dev) { struct sis900_private * sis_priv = (struct sis900_private *)net_dev->priv; @@ -364,7 +401,7 @@ static int __init sis900_mii_probe (struct net_device * net_dev) sis_priv->mii->chip_info->name); pci_read_config_byte(sis_priv->pci_dev, PCI_CLASS_REVISION, &revision); - if (revision == SIS630E_REV) { + if (revision == SIS630E_900_REV) { /* SiS 630E has some bugs on default value of PHY registers */ mdio_write(net_dev, sis_priv->cur_phy, MII_ANADV, 0x05e1); mdio_write(net_dev, sis_priv->cur_phy, MII_CONFIG1, 0x22); @@ -384,8 +421,15 @@ static int __init sis900_mii_probe (struct net_device * net_dev) /* Delay between EEPROM clock transitions. */ #define eeprom_delay() inl(ee_addr) -/* Read Serial EEPROM through EEPROM Access Register, Note that location is - in word (16 bits) unit */ +/** + * read_eeprom: - Read Serial EEPROM + * @ioaddr: base i/o address + * @location: the EEPROM location to read + * + * Read Serial EEPROM through EEPROM Access Register. + * Note that location is in word (16 bits) unit + */ + static u16 read_eeprom(long ioaddr, int location) { int i; @@ -453,6 +497,17 @@ static void mdio_reset(long mdio_addr) return; } +/** + * mdio_read: - read MII PHY register + * @net_dev: the net device to read + * @phy_id: the phy address to read + * @location: the phy regiester id to read + * + * Read MII registers through MDIO and MDC + * using MDIO management frame structure and protocol(defined by ISO/IEC). + * Please see SiS7014 or ICS spec + */ + static u16 mdio_read(struct net_device *net_dev, int phy_id, int location) { long mdio_addr = net_dev->base_addr + mear; @@ -484,6 +539,18 @@ static u16 mdio_read(struct net_device *net_dev, int phy_id, int location) return retval; } +/** + * mdio_write: - write MII PHY register + * @net_dev: the net device to write + * @phy_id: the phy address to write + * @location: the phy regiester id to write + * @value: the register value to write with + * + * Write MII registers with @value through MDIO and MDC + * using MDIO management frame structure and protocol(defined by ISO/IEC) + * please see SiS7014 or ICS spec + */ + static void mdio_write(struct net_device *net_dev, int phy_id, int location, int value) { long mdio_addr = net_dev->base_addr + mear; @@ -525,25 +592,36 @@ static void mdio_write(struct net_device *net_dev, int phy_id, int location, int return; } +/** + * sis900_open: - open sis900 device + * @net_dev: the net device to open + * + * Do some initialization and start net interface. + * enable interrupts and set sis900 timer. + */ + static int sis900_open(struct net_device *net_dev) { struct sis900_private *sis_priv = (struct sis900_private *)net_dev->priv; long ioaddr = net_dev->base_addr; u8 revision; - int ret; + + MOD_INC_USE_COUNT; /* Soft reset the chip. */ sis900_reset(net_dev); - /* Equalizer workaroung Rule */ + /* Equalizer workaround Rule */ pci_read_config_byte(sis_priv->pci_dev, PCI_CLASS_REVISION, &revision); - if (revision == SIS630E_REV || revision == SIS630EA1_REV) - sis630e_set_eq(net_dev); + if (revision == SIS630E_900_REV || revision == SIS630EA1_900_REV || + revision == SIS630A_900_REV) + sis630_set_eq(net_dev,revision); - ret = request_irq(net_dev->irq, &sis900_interrupt, SA_SHIRQ, net_dev->name, net_dev); - if (ret) - return ret; + if (request_irq(net_dev->irq, &sis900_interrupt, SA_SHIRQ, net_dev->name, net_dev)) { + MOD_DEC_USE_COUNT; + return -EAGAIN; + } sis900_init_rxfilter(net_dev); @@ -572,7 +650,14 @@ sis900_open(struct net_device *net_dev) return 0; } -/* set receive filter address to our MAC address */ +/** + * sis900_init_rxfilter: - Initialize the Rx filter + * @net_dev: the net device to initialize for + * + * Set receive filter address to our MAC address + * and enable packet filtering. + */ + static void sis900_init_rxfilter (struct net_device * net_dev) { @@ -603,7 +688,13 @@ sis900_init_rxfilter (struct net_device * net_dev) outl(rfcrSave | RFEN, rfcr + ioaddr); } -/* Initialize the Tx ring. */ +/** + * sis900_init_tx_ring: - Initialize the Tx descriptor ring + * @net_dev: the net device to initialize for + * + * Initialize the Tx descriptor ring, + */ + static void sis900_init_tx_ring(struct net_device *net_dev) { @@ -630,7 +721,14 @@ sis900_init_tx_ring(struct net_device *net_dev) net_dev->name, inl(ioaddr + txdp)); } -/* Initialize the Rx descriptor ring, pre-allocate recevie buffers */ +/** + * sis900_init_rx_ring: - Initialize the Rx descriptor ring + * @net_dev: the net device to initialize for + * + * Initialize the Rx descriptor ring, + * and pre-allocate recevie buffers (socket buffer) + */ + static void sis900_init_rx_ring(struct net_device *net_dev) { @@ -676,51 +774,79 @@ sis900_init_rx_ring(struct net_device *net_dev) net_dev->name, inl(ioaddr + rxdp)); } -/* 630E equalizer workaroung rule(Cyrus Huang 08/15) - PHY register 14h(Test) - Bit 14: 0 -- Automatically dectect (default) - 1 -- Manually set Equalizer filter - Bit 13: 0 -- (Default) - 1 -- Speed up convergence of equalizer setting - Bit 9 : 0 -- (Default) - 1 -- Disable Baseline Wander - Bit 3~7 -- Equalizer filter setting - - Link ON: Set Bit 9, 13 to 1, Bit 14 to 0 - Then calculate equalizer value - Then set equalizer value, and set Bit 14 to 1, Bit 9 to 0 - Link Off:Set Bit 13 to 1, Bit 14 to 0 - - Calculate Equalizer value: - When Link is ON and Bit 14 is 0, SIS900PHY will auto-dectect proper equalizer value. - When the equalizer is stable, this value is not a fixed value. It will be within - a small range(eg. 7~9). Then we get a minimum and a maximum value(eg. min=7, max=9) - 0 <= max <= 4 --> set equalizer to max - 5 <= max <= 14 --> set equalizer to max+1 or - set equalizer to max+2 if max == min - max >= 15 --> set equalizer to max+5 or - set equalizer to max+6 if max == min -*/ -static void sis630e_set_eq(struct net_device *net_dev) +/** + * sis630_set_eq: - set phy equalizer value for 630 LAN + * @net_dev: the net device to set equalizer value + * @revision: 630 LAN revision number + * + * 630E equalizer workaround rule(Cyrus Huang 08/15) + * PHY register 14h(Test) + * Bit 14: 0 -- Automatically dectect (default) + * 1 -- Manually set Equalizer filter + * Bit 13: 0 -- (Default) + * 1 -- Speed up convergence of equalizer setting + * Bit 9 : 0 -- (Default) + * 1 -- Disable Baseline Wander + * Bit 3~7 -- Equalizer filter setting + * Link ON: Set Bit 9, 13 to 1, Bit 14 to 0 + * Then calculate equalizer value + * Then set equalizer value, and set Bit 14 to 1, Bit 9 to 0 + * Link Off:Set Bit 13 to 1, Bit 14 to 0 + * Calculate Equalizer value: + * When Link is ON and Bit 14 is 0, SIS900PHY will auto-dectect proper equalizer value. + * When the equalizer is stable, this value is not a fixed value. It will be within + * a small range(eg. 7~9). Then we get a minimum and a maximum value(eg. min=7, max=9) + * 0 <= max <= 4 --> set equalizer to max + * 5 <= max <= 14 --> set equalizer to max+1 or set equalizer to max+2 if max == min + * max >= 15 --> set equalizer to max+5 or set equalizer to max+6 if max == min + */ + +static void sis630_set_eq(struct net_device *net_dev, u8 revision) { struct sis900_private *sis_priv = (struct sis900_private *)net_dev->priv; u16 reg14h, eq_value, max_value=0, min_value=0; + u8 host_bridge_rev; int i, maxcount=10; + struct pci_dev *dev=NULL; + + if ((dev = pci_find_device(SIS630_DEVICE_ID, SIS630_VENDOR_ID, dev))) + pci_read_config_byte(dev, PCI_CLASS_REVISION, &host_bridge_rev); if (netif_carrier_ok(net_dev)) { reg14h=mdio_read(net_dev, sis_priv->cur_phy, MII_RESV); mdio_write(net_dev, sis_priv->cur_phy, MII_RESV, (0x2200 | reg14h) & 0xBFFF); for (i=0; i < maxcount; i++) { eq_value=(0x00F8 & mdio_read(net_dev, sis_priv->cur_phy, MII_RESV)) >> 3; + if (i == 0) + max_value=min_value=eq_value; max_value=(eq_value > max_value) ? eq_value : max_value; min_value=(eq_value < min_value) ? eq_value : min_value; - } - if (max_value < 5) - eq_value=max_value; - else if (max_value >= 5 && max_value < 15) - eq_value=(max_value == min_value) ? max_value+2 : max_value+1; - else if (max_value >= 15) - eq_value=(max_value == min_value) ? max_value+6 : max_value+5; + } + /* 630E rule to determine the equalizer value */ + if (revision == SIS630E_900_REV || revision == SIS630EA1_900_REV) { + if (max_value < 5) + eq_value=max_value; + else if (max_value >= 5 && max_value < 15) + eq_value=(max_value == min_value) ? max_value+2 : max_value+1; + else if (max_value >= 15) + eq_value=(max_value == min_value) ? max_value+6 : max_value+5; + } + /* 630A0 rule to determine the equalizer value */ + if (revision == SIS630A_900_REV && host_bridge_rev == SIS630A0) { + if (max_value < 5) + eq_value=max_value+3; + else if (max_value >= 5) + eq_value=max_value+5; + } + /* 630B0&B1 rule to determine the equalizer value */ + if (revision == SIS630A_900_REV && + (host_bridge_rev == SIS630B0 || host_bridge_rev == SIS630B1)) { + if (max_value == 0) + eq_value=3; + else + eq_value=(max_value+min_value+1)/2; + } + /* write equalizer value and setting */ reg14h=mdio_read(net_dev, sis_priv->cur_phy, MII_RESV); reg14h=(reg14h & 0xFF07) | ((eq_value << 3) & 0x00F8); reg14h=(reg14h | 0x6000) & 0xFDFF; @@ -733,9 +859,14 @@ static void sis630e_set_eq(struct net_device *net_dev) return; } -/* on each timer ticks we check two things, Link Status (ON/OFF) and - Link Mode (10/100/Full/Half) +/** + * sis900_timer: - sis900 timer routine + * @data: pointer to sis900 net device + * + * On each timer ticks we check two things, + * link status (ON/OFF) and link mode (10/100/Full/Half) */ + static void sis900_timer(unsigned long data) { struct net_device *net_dev = (struct net_device *)data; @@ -756,10 +887,11 @@ static void sis900_timer(unsigned long data) next_tick = HZ; netif_carrier_off(net_dev); - /* Equalizer workaroung Rule */ + /* Equalizer workaround Rule */ pci_read_config_byte(sis_priv->pci_dev, PCI_CLASS_REVISION, &revision); - if (revision == SIS630E_REV || revision == SIS630EA1_REV) - sis630e_set_eq(net_dev); + if (revision == SIS630E_900_REV || revision == SIS630EA1_900_REV || + revision == SIS630A_900_REV) + sis630_set_eq(net_dev, revision); printk(KERN_INFO "%s: Media Link Off\n", net_dev->name); @@ -777,10 +909,11 @@ static void sis900_timer(unsigned long data) netif_carrier_on(net_dev); next_tick = 5*HZ; - /* Equalizer workaroung Rule */ + /* Equalizer workaround Rule */ pci_read_config_byte(sis_priv->pci_dev, PCI_CLASS_REVISION, &revision); - if (revision == SIS630E_REV || revision == SIS630EA1_REV) - sis630e_set_eq(net_dev); + if (revision == SIS630E_900_REV || revision == SIS630EA1_900_REV || + revision == SIS630A_900_REV) + sis630_set_eq(net_dev, revision); /* change what cur_phy means */ if (mii_phy->phy_addr != sis_priv->cur_phy) { @@ -802,6 +935,16 @@ static void sis900_timer(unsigned long data) sis_priv->timer.expires = jiffies + next_tick; add_timer(&sis_priv->timer); } + +/** + * sis900_check_mode: - check the media mode for sis900 + * @net_dev: the net device to be checked + * @mii_phy: the mii phy + * + * call mii_phy->chip_info->read_mode function + * to check the speed and duplex mode for sis900 + */ + static void sis900_check_mode (struct net_device *net_dev, struct mii_phy *mii_phy) { struct sis900_private *sis_priv = (struct sis900_private *)net_dev->priv; @@ -831,6 +974,18 @@ static void sis900_check_mode (struct net_device *net_dev, struct mii_phy *mii_p outl (tx_flags, ioaddr + txcfg); outl (rx_flags, ioaddr + rxcfg); } + +/** + * sis900_read_mode: - read media mode for sis900 internal phy + * @net_dev: the net device to read mode for + * @phy_addr: mii phy address + * @speed: the transmit speed to be determined + * @duplex: the duplex mode to be determined + * + * read MII_STSOUT register from sis900 internal phy + * to determine the speed and duplex mode for sis900 + */ + static void sis900_read_mode(struct net_device *net_dev, int phy_addr, int *speed, int *duplex) { int i = 0; @@ -860,6 +1015,18 @@ static void sis900_read_mode(struct net_device *net_dev, int phy_addr, int *spee *duplex == FDX_CAPABLE_FULL_SELECTED ? "full" : "half"); } + +/** + * amd79c901_read_mode: - read media mode for amd79c901 phy + * @net_dev: the net device to read mode for + * @phy_addr: mii phy address + * @speed: the transmit speed to be determined + * @duplex: the duplex mode to be determined + * + * read MII_STATUS register from amd79c901 phy + * to determine the speed and duplex mode for sis900 + */ + static void amd79c901_read_mode(struct net_device *net_dev, int phy_addr, int *speed, int *duplex) { int i; @@ -902,7 +1069,18 @@ static void amd79c901_read_mode(struct net_device *net_dev, int phy_addr, int *s printk(KERN_INFO "%s: Media Link Off\n", net_dev->name); } } -/* ICS1893 PHY use Quick Poll Detailed Status Register to get its status */ + +/** + * ics1893_read_mode: - read media mode for ICS1893 PHY + * @net_dev: the net device to read mode for + * @phy_addr: mii phy address + * @speed: the transmit speed to be determined + * @duplex: the duplex mode to be determined + * + * ICS1893 PHY use Quick Poll Detailed Status register + * to determine the speed and duplex mode for sis900 + */ + static void ics1893_read_mode(struct net_device *net_dev, int phy_addr, int *speed, int *duplex) { int i = 0; @@ -933,6 +1111,14 @@ static void ics1893_read_mode(struct net_device *net_dev, int phy_addr, int *spe printk(KERN_INFO "%s: Media Link Off\n", net_dev->name); } +/** + * sis900_tx_timeout: - sis900 transmit timeout routine + * @net_dev: the net device to transmit + * + * print transmit timeout status + * disable interrupts and do some tasks + */ + static void sis900_tx_timeout(struct net_device *net_dev) { struct sis900_private *sis_priv = (struct sis900_private *)net_dev->priv; @@ -975,6 +1161,16 @@ static void sis900_tx_timeout(struct net_device *net_dev) return; } +/** + * sis900_start_xmit: - sis900 start transmit routine + * @skb: socket buffer pointer to put the data being transmitted + * @net_dev: the net device to transmit with + * + * Set the transmit buffer descriptor, + * and write TxENA to enable transimt state machine. + * tell upper layer if the buffer is full + */ + static int sis900_start_xmit(struct sk_buff *skb, struct net_device *net_dev) { @@ -1015,8 +1211,16 @@ sis900_start_xmit(struct sk_buff *skb, struct net_device *net_dev) return 0; } -/* The interrupt handler does all of the Rx thread work and cleans up - after the Tx thread. */ +/** + * sis900_interrupt: - sis900 interrupt handler + * @irq: the irq number + * @dev_instance: the client data object + * @regs: snapshot of processor context + * + * The interrupt handler does all of the Rx thread work, + * and cleans up after the Tx thread + */ + static void sis900_interrupt(int irq, void *dev_instance, struct pt_regs *regs) { struct net_device *net_dev = (struct net_device *)dev_instance; @@ -1066,8 +1270,16 @@ static void sis900_interrupt(int irq, void *dev_instance, struct pt_regs *regs) return; } -/* Process receive interrupt events, put buffer to higher layer and refill buffer pool - Note: This fucntion is called by interrupt handler, don't do "too much" work here */ +/** + * sis900_rx: - sis900 receive routine + * @net_dev: the net device which receives data + * + * Process receive interrupt events, + * put buffer to higher layer and refill buffer pool + * Note: This fucntion is called by interrupt handler, + * don't do "too much" work here + */ + static int sis900_rx(struct net_device *net_dev) { struct sis900_private *sis_priv = (struct sis900_private *)net_dev->priv; @@ -1187,8 +1399,16 @@ static int sis900_rx(struct net_device *net_dev) return 0; } -/* finish up transmission of packets, check for error condition and free skbuff etc. - Note: This fucntion is called by interrupt handler, don't do "too much" work here */ +/** + * sis900_finish_xmit: - finish up transmission of packets + * @net_dev: the net device to be transmitted on + * + * Check for error condition and free socket buffer etc + * schedule for more transmission as needed + * Note: This fucntion is called by interrupt handler, + * don't do "too much" work here + */ + static void sis900_finish_xmit (struct net_device *net_dev) { struct sis900_private *sis_priv = (struct sis900_private *)net_dev->priv; @@ -1244,6 +1464,14 @@ static void sis900_finish_xmit (struct net_device *net_dev) } } +/** + * sis900_close: - close sis900 device + * @net_dev: the net device to be closed + * + * Disable interrupts, stop the Tx and Rx Status Machine + * free Tx and RX socket buffer + */ + static int sis900_close(struct net_device *net_dev) { @@ -1278,9 +1506,20 @@ sis900_close(struct net_device *net_dev) /* Green! Put the chip in low-power mode. */ + MOD_DEC_USE_COUNT; + return 0; } +/** + * mii_ioctl: - process MII i/o control command + * @net_dev: the net device to command for + * @rq: parameter for command + * @cmd: the i/o command + * + * Process MII command like read/write MII register + */ + static int mii_ioctl(struct net_device *net_dev, struct ifreq *rq, int cmd) { struct sis900_private *sis_priv = (struct sis900_private *)net_dev->priv; @@ -1303,6 +1542,13 @@ static int mii_ioctl(struct net_device *net_dev, struct ifreq *rq, int cmd) } } +/** + * sis900_get_stats: - Get sis900 read/write statistics + * @net_dev: the net device to get statistics for + * + * get tx/rx statistics for sis900 + */ + static struct net_device_stats * sis900_get_stats(struct net_device *net_dev) { @@ -1311,7 +1557,16 @@ sis900_get_stats(struct net_device *net_dev) return &sis_priv->stats; } -/* Support for media type changes via net_device->set_config */ +/** + * sis900_set_config: - Set media type by net_device.set_config + * @dev: the net device for media type change + * @map: ifmap passed by ifconfig + * + * Set media type to 10baseT, 100baseT or 0(for auto) by ifconfig + * we support only port changes. All other runtime configuration + * changes will be ignored + */ + static int sis900_set_config(struct net_device *dev, struct ifmap *map) { struct sis900_private *sis_priv = (struct sis900_private *)dev->priv; @@ -1319,8 +1574,8 @@ static int sis900_set_config(struct net_device *dev, struct ifmap *map) u16 status; - /* we support only port changes. All other runtime configuration - changes will be ignored (io base and interrupt changes for example)*/ + /* + )*/ if ((map->port != (u_char)(-1)) && (map->port != dev->if_port)) { /* we switch on the ifmap->port field. I couldn't find anything like a definition or standard for the values of that field. @@ -1401,8 +1656,14 @@ static int sis900_set_config(struct net_device *dev, struct ifmap *map) return 0; } -/* SiS 900 uses the most sigificant 7 bits to index a 128 bits multicast hash table, which makes - this function a little bit different from other drivers */ +/** + * sis900_compute_hashtable_index: - compute hashtable index + * @addr: multicast address + * + * SiS 900 uses the most sigificant 7 bits to index a 128 bits multicast + * hash table, which makes this function a little bit different from other drivers + */ + static u16 sis900_compute_hashtable_index(u8 *addr) { @@ -1430,6 +1691,14 @@ static u16 sis900_compute_hashtable_index(u8 *addr) return ((int)(crc >> 25)); } +/** + * set_rx_mode: - Set SiS900 receive mode + * @net_dev: the net device to be set + * + * Set SiS900 receive mode for promiscuous, multicast, or broadcast mode. + * And set the appropriate multicast filter. + */ + static void set_rx_mode(struct net_device *net_dev) { long ioaddr = net_dev->base_addr; @@ -1486,6 +1755,14 @@ static void set_rx_mode(struct net_device *net_dev) return; } +/** + * sis900_reset: - Reset sis900 MAC + * @net_dev: the net device to reset + * + * reset sis900 MAC and wait until finished + * reset through command register + */ + static void sis900_reset(struct net_device *net_dev) { long ioaddr = net_dev->base_addr; @@ -1506,6 +1783,13 @@ static void sis900_reset(struct net_device *net_dev) outl(PESEL, ioaddr + cfg); } +/** + * sis900_remove: - Remove sis900 device + * @pci_dev: the pci device to be removed + * + * remove and release SiS900 net device + */ + static void __devexit sis900_remove(struct pci_dev *pci_dev) { struct net_device *net_dev = pci_dev->driver_data; diff --git a/drivers/net/sis900.h b/drivers/net/sis900.h index 561a40444d9a..d446b5a788df 100644 --- a/drivers/net/sis900.h +++ b/drivers/net/sis900.h @@ -231,9 +231,14 @@ enum mii_stssum_register_bits { MII_STSSUM_AUTO = 0x0002, MII_STSSUM_SPD = 0x0001 }; +enum sis900_revision_id { + SIS630A_900_REV = 0x80, SIS630E_900_REV = 0x81, + SIS630S_900_REV = 0x82, SIS630EA1_900_REV = 0x83 +}; + enum sis630_revision_id { - SIS630E_REV = 0x81, SIS630EA1_REV = 0x83, - SIS630S_REV = 0x82 + SIS630A0 = 0x00, SIS630A1 = 0x01, + SIS630B0 = 0x10, SIS630B1 = 0x11 }; #define FDX_CAPABLE_DUPLEX_UNKNOWN 0 @@ -257,7 +262,9 @@ enum sis630_revision_id { /* PCI stuff, should be move to pic.h */ #define PCI_DEVICE_ID_SI_900 0x900 -#define PCI_DEVICE_ID_SI_7016 0x7016 +#define PCI_DEVICE_ID_SI_7016 0x7016 +#define SIS630_VENDOR_ID 0x0630 +#define SIS630_DEVICE_ID 0x1039 /* ioctl for accessing MII transceiver */ #define SIOCGMIIPHY (SIOCDEVPRIVATE) /* Get the PHY in use. */ diff --git a/drivers/pci/pci.ids b/drivers/pci/pci.ids index df9937fd2123..84c0e03647de 100644 --- a/drivers/pci/pci.ids +++ b/drivers/pci/pci.ids @@ -4598,6 +4598,8 @@ 250f 82820 820 (Camino) Chipset PCI to AGP Bridge 2520 82805AA MTH Memory Translator Hub 2521 82804AA MRH-S Memory Repeater Hub for SDRAM + 2530 82850 850 (Tehama) Chipset Host Bridge (MCH) + 2532 82850 850 (Tehama) Chipset AGP Bridge 5200 EtherExpress PRO/100 5201 EtherExpress PRO/100 8086 0001 EtherExpress PRO/100 Server Ethernet Adapter diff --git a/drivers/sbus/Makefile b/drivers/sbus/Makefile index 26dd66f22c7b..b0b73c368c90 100644 --- a/drivers/sbus/Makefile +++ b/drivers/sbus/Makefile @@ -8,7 +8,9 @@ # Note 2! The CFLAGS definitions are now in the main makefile... O_TARGET := sbus_all.o +ifneq ($(ARCH),m68k) obj-y := sbus.o dvma.o +endif subdir-y += char subdir-m += char diff --git a/drivers/zorro/Makefile b/drivers/zorro/Makefile index 363e699eab88..f5a035c9b769 100644 --- a/drivers/zorro/Makefile +++ b/drivers/zorro/Makefile @@ -9,18 +9,12 @@ # parent makefile. # -L_TARGET := zorro.a +O_TARGET := driver.o -# Nasty trick as we need to link files with no references from the outside. -O_TARGET := zorro_core.o -L_OBJS := zorro_core.o -OX_OBJS := zorro.o +export-objs := zorro.o -ifdef CONFIG_PROC_FS -O_OBJS += proc.o -endif - -L_OBJS += names.o +obj-$(CONFIG_ZORRO) += zorro.o names.o +obj-$(CONFIG_PROC_FS) += proc.o include $(TOPDIR)/Rules.make diff --git a/fs/buffer.c b/fs/buffer.c index 377d9f22fde1..b11c9e3ea5ac 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -639,8 +639,13 @@ void __invalidate_buffers(kdev_t dev, int destroy_dirty_buffers) continue; for (i = nr_buffers_type[nlist]; i > 0 ; bh = bh_next, i--) { bh_next = bh->b_next_free; + + /* Another device? */ if (bh->b_dev != dev) continue; + /* Part of a mapping? */ + if (bh->b_page->mapping) + continue; if (buffer_locked(bh)) { atomic_inc(&bh->b_count); spin_unlock(&lru_list_lock); @@ -1512,13 +1517,13 @@ static int __block_write_full_page(struct inode *inode, struct page *page, get_b block++; } while (bh != head); - /* Stage 2: lock the buffers, mark them dirty */ + /* Stage 2: lock the buffers, mark them clean */ do { lock_buffer(bh); bh->b_end_io = end_buffer_io_async; atomic_inc(&bh->b_count); set_bit(BH_Uptodate, &bh->b_state); - set_bit(BH_Dirty, &bh->b_state); + clear_bit(BH_Dirty, &bh->b_state); bh = bh->b_this_page; } while (bh != head); @@ -2093,7 +2098,7 @@ int brw_kiovec(int rw, int nr, struct kiobuf *iovec[], if (rw == WRITE) { set_bit(BH_Uptodate, &tmp->b_state); - set_bit(BH_Dirty, &tmp->b_state); + clear_bit(BH_Dirty, &tmp->b_state); } bh[bhind++] = tmp; diff --git a/fs/exec.c b/fs/exec.c index 695280bc9c51..43f3321c4c83 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -313,9 +313,7 @@ int setup_arg_pages(struct linux_binprm *bprm) mpnt->vm_pgoff = 0; mpnt->vm_file = NULL; mpnt->vm_private_data = (void *) 0; - spin_lock(¤t->mm->page_table_lock); insert_vm_struct(current->mm, mpnt); - spin_unlock(¤t->mm->page_table_lock); current->mm->total_vm = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT; } diff --git a/fs/namei.c b/fs/namei.c index 37644f4a1015..105833e4eac7 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1611,7 +1611,7 @@ asmlinkage long sys_link(const char * oldname, const char * newname) goto out; error = -EXDEV; if (old_nd.mnt != nd.mnt) - goto out; + goto out_release; new_dentry = lookup_create(&nd, 0); error = PTR_ERR(new_dentry); if (!IS_ERR(new_dentry)) { @@ -1619,6 +1619,7 @@ asmlinkage long sys_link(const char * oldname, const char * newname) dput(new_dentry); } up(&nd.dentry->d_inode->i_sem); +out_release: path_release(&nd); out: path_release(&old_nd); diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c index 4cf7c897c1a7..3c14a44c4d5b 100644 --- a/fs/nfsd/nfs3xdr.c +++ b/fs/nfsd/nfs3xdr.c @@ -35,14 +35,6 @@ static u32 nfs3_ftypes[] = { /* * XDR functions for basic NFS types */ -static inline u32 * -dec64(u32 *p, u64 *valp) -{ - *valp = ((u64) ntohl(*p++)) << 32; - *valp |= ntohl(*p++); - return p; -} - static inline u32 * encode_time3(u32 *p, time_t secs) { @@ -142,7 +134,7 @@ decode_sattr3(u32 *p, struct iattr *iap) u64 newsize; iap->ia_valid |= ATTR_SIZE; - p = dec64(p, &newsize); + p = xdr_decode_hyper(p, &newsize); if (newsize <= NFS_OFFSET_MAX) iap->ia_size = newsize; else @@ -343,7 +335,7 @@ nfs3svc_decode_readargs(struct svc_rqst *rqstp, u32 *p, struct nfsd3_readargs *args) { if (!(p = decode_fh(p, &args->fh)) - || !(p = dec64(p, &args->offset))) + || !(p = xdr_decode_hyper(p, &args->offset))) return 0; args->count = ntohl(*p++); @@ -355,7 +347,7 @@ nfs3svc_decode_writeargs(struct svc_rqst *rqstp, u32 *p, struct nfsd3_writeargs *args) { if (!(p = decode_fh(p, &args->fh)) - || !(p = dec64(p, &args->offset))) + || !(p = xdr_decode_hyper(p, &args->offset))) return 0; args->count = ntohl(*p++); @@ -471,7 +463,7 @@ nfs3svc_decode_readdirargs(struct svc_rqst *rqstp, u32 *p, { if (!(p = decode_fh(p, &args->fh))) return 0; - p = dec64(p, &args->cookie); + p = xdr_decode_hyper(p, &args->cookie); args->verf = p; p += 2; args->dircount = ~0; args->count = ntohl(*p++); @@ -485,7 +477,7 @@ nfs3svc_decode_readdirplusargs(struct svc_rqst *rqstp, u32 *p, { if (!(p = decode_fh(p, &args->fh))) return 0; - p = dec64(p, &args->cookie); + p = xdr_decode_hyper(p, &args->cookie); args->verf = p; p += 2; args->dircount = ntohl(*p++); args->count = ntohl(*p++); @@ -499,7 +491,7 @@ nfs3svc_decode_commitargs(struct svc_rqst *rqstp, u32 *p, { if (!(p = decode_fh(p, &args->fh))) return 0; - p = dec64(p, &args->offset); + p = xdr_decode_hyper(p, &args->offset); args->count = ntohl(*p++); return xdr_argsize_check(rqstp, p); diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c index b57b85ca1fdc..bd01b57f5c0c 100644 --- a/fs/nfsd/vfs.c +++ b/fs/nfsd/vfs.c @@ -359,8 +359,16 @@ static struct accessmap nfs3_diraccess[] = { }; static struct accessmap nfs3_anyaccess[] = { - /* XXX: should we try to cover read/write here for clients that - * rely on us to do their access checking for special files? */ + /* Some clients - Solaris 2.6 at least, make an access call + * to the server to check for access for things like /dev/null + * (which really, the server doesn't care about). So + * We provide simple access checking for them, looking + * mainly at mode bits + */ + { NFS3_ACCESS_READ, MAY_READ }, + { NFS3_ACCESS_EXECUTE, MAY_EXEC }, + { NFS3_ACCESS_MODIFY, MAY_WRITE }, + { NFS3_ACCESS_EXTEND, MAY_WRITE }, { 0, 0 } }; @@ -1501,12 +1509,17 @@ nfsd_permission(struct svc_export *exp, struct dentry *dentry, int acc) inode->i_uid, inode->i_gid, current->fsuid, current->fsgid); #endif - if (acc & (MAY_WRITE | MAY_SATTR | MAY_TRUNC)) { - if (EX_RDONLY(exp) || IS_RDONLY(inode)) - return nfserr_rofs; - if (/* (acc & MAY_WRITE) && */ IS_IMMUTABLE(inode)) - return nfserr_perm; - } + /* only care about readonly exports for files and + * directories. links don't have meaningful write access, + * and all else is local to the client + */ + if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)) + if (acc & (MAY_WRITE | MAY_SATTR | MAY_TRUNC)) { + if (EX_RDONLY(exp) || IS_RDONLY(inode)) + return nfserr_rofs; + if (/* (acc & MAY_WRITE) && */ IS_IMMUTABLE(inode)) + return nfserr_perm; + } if ((acc & MAY_TRUNC) && IS_APPEND(inode)) return nfserr_perm; diff --git a/include/linux/acpi.h b/include/linux/acpi.h index c0f2130ceb87..ff1dcaf45551 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -169,6 +169,7 @@ enum ACPI_FACS, ACPI_XSDT, ACPI_PMTIMER, + ACPI_BATTERY, }; #define ACPI_SLP_TYP_DISABLED (~0UL) diff --git a/include/linux/agp_backend.h b/include/linux/agp_backend.h index 99df46b37a29..a9d0af58a6d9 100644 --- a/include/linux/agp_backend.h +++ b/include/linux/agp_backend.h @@ -47,6 +47,7 @@ enum chipset_type { INTEL_I810, INTEL_I815, INTEL_I840, + INTEL_I850, VIA_GENERIC, VIA_VP3, VIA_MVP3, diff --git a/include/linux/lvm.h b/include/linux/lvm.h index 2600a75a17a4..2fe9bcdfcb3f 100644 --- a/include/linux/lvm.h +++ b/include/linux/lvm.h @@ -1,12 +1,14 @@ /* + * include/linux/lvm.h * kernel/lvm.h + * tools/lib/lvm.h * - * Copyright (C) 1997 - 2000 Heinz Mauelshagen, Germany + * Copyright (C) 1997 - 2000 Heinz Mauelshagen, Sistina Software * * February-November 1997 * May-July 1998 * January-March,July,September,October,Dezember 1999 - * January 2000 + * January,February,July,November 2000 * * lvm is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -49,6 +51,12 @@ * 08/12/1999 - changed LVM_LV_SIZE_MAX macro to reflect current 1TB limit * 01/01/2000 - extended lv_v2 core structure by wait_queue member * 12/02/2000 - integrated Andrea Arcagnelli's snapshot work + * 18/02/2000 - seperated user and kernel space parts by + * #ifdef them with __KERNEL__ + * 08/03/2000 - implemented cluster/shared bits for vg_access + * 26/06/2000 - implemented snapshot persistency and resizing support + * 02/11/2000 - added hash table size member to lv structure + * 12/11/2000 - removed unneeded timestamp definitions * */ @@ -56,7 +64,10 @@ #ifndef _LVM_H_INCLUDE #define _LVM_H_INCLUDE -#define _LVM_H_VERSION "LVM 0.8final (15/2/2000)" +#define _LVM_KERNEL_H_VERSION "LVM 0.9 (13/11/2000)" + +#include +#include /* * preprocessor definitions @@ -64,8 +75,9 @@ /* if you like emergency reset code in the driver */ #define LVM_TOTAL_RESET +#ifdef __KERNEL__ #define LVM_GET_INODE -#undef LVM_HD_NAME +#undef LVM_HD_NAME /* display nice names in /proc/partitions */ /* lots of debugging output (see driver source) #define DEBUG_LVM_GET_INFO @@ -79,37 +91,50 @@ #define DEBUG_LVM_BLK_OPEN #define DEBUG_KFREE */ - -#include +#endif /* #ifdef __KERNEL__ */ #ifndef __KERNEL__ -#define ____NOT_KERNEL____ #define __KERNEL__ -#endif #include -#ifdef ____NOT_KERNEL____ -#undef ____NOT_KERNEL____ +#include #undef __KERNEL__ -#endif +#else +#include +#include +#endif /* #ifndef __KERNEL__ */ +#include #include -#if LINUX_VERSION_CODE >= KERNEL_VERSION ( 2, 3 ,0) +#ifdef __KERNEL__ #include -#else -#include -#endif - #include +#endif /* #ifdef __KERNEL__ */ + #include #if !defined ( LVM_BLK_MAJOR) || !defined ( LVM_CHAR_MAJOR) #error Bad include/linux/major.h - LVM MAJOR undefined #endif +#ifdef BLOCK_SIZE +#undef BLOCK_SIZE +#endif + +#ifdef CONFIG_ARCH_S390 +#define BLOCK_SIZE 4096 +#else +#define BLOCK_SIZE 1024 +#endif + +#ifndef SECTOR_SIZE +#define SECTOR_SIZE 512 +#endif #define LVM_STRUCT_VERSION 1 /* structure version */ +#define LVM_DIR_PREFIX "/dev/" + #ifndef min #define min(a,b) (((a)<(b))?(a):(b)) #endif @@ -119,26 +144,33 @@ /* set the default structure version */ #if ( LVM_STRUCT_VERSION == 1) -#define pv_t pv_v1_t -#define lv_t lv_v2_t -#define vg_t vg_v1_t -#define pv_disk_t pv_disk_v1_t -#define lv_disk_t lv_disk_v1_t -#define vg_disk_t vg_disk_v1_t -#define lv_exception_t lv_v2_exception_t +#define pv_t pv_v2_t +#define lv_t lv_v4_t +#define vg_t vg_v3_t +#define pv_disk_t pv_disk_v2_t +#define lv_disk_t lv_disk_v3_t +#define vg_disk_t vg_disk_v2_t +#define lv_block_exception_t lv_block_exception_v1_t +#define lv_COW_table_disk_t lv_COW_table_disk_v1_t #endif + /* - * i/o protocoll version + * i/o protocol version * * defined here for the driver and defined seperate in the - * user land LVM parts + * user land tools/lib/liblvm.h * */ -#define LVM_DRIVER_IOP_VERSION 6 +#define LVM_DRIVER_IOP_VERSION 10 #define LVM_NAME "lvm" +#define LVM_GLOBAL "global" +#define LVM_DIR "lvm" +#define LVM_VG_SUBDIR "VGs" +#define LVM_LV_SUBDIR "LVs" +#define LVM_PV_SUBDIR "PVs" /* * VG/LV indexing macros @@ -216,11 +248,12 @@ #define LVM_TIMESTAMP_DISK_SIZE 512L /* reserved for timekeeping */ /* name list of physical volumes on disk */ -#define LVM_PV_NAMELIST_DISK_BASE ( LVM_TIMESTAMP_DISK_BASE + \ +#define LVM_PV_UUIDLIST_DISK_BASE ( LVM_TIMESTAMP_DISK_BASE + \ LVM_TIMESTAMP_DISK_SIZE) /* now for the dynamically calculated parts of the VGDA */ -#define LVM_LV_DISK_OFFSET(a, b) ( (a)->lv_on_disk.base + sizeof ( lv_t) * b) +#define LVM_LV_DISK_OFFSET(a, b) ( (a)->lv_on_disk.base + \ + sizeof ( lv_disk_t) * b) #define LVM_DISK_SIZE(pv) ( (pv)->pe_on_disk.base + \ (pv)->pe_on_disk.size) #define LVM_PE_DISK_OFFSET(pe, pv) ( pe * pv->pe_size + \ @@ -250,21 +283,21 @@ * Should be a sufficient spectrum ;*) */ -/* This is the usable size of disk_pe_t.le_num !!! v v */ +/* This is the usable size of pe_disk_t.le_num !!! v v */ #define LVM_PE_T_MAX ( ( 1 << ( sizeof ( uint16_t) * 8)) - 2) -#define LVM_LV_SIZE_MAX(a) ( ( long long) LVM_PE_T_MAX * (a)->pe_size > ( long long) 2*1024*1024*1024 ? ( long long) 2*1024*1024*1024 : ( long long) LVM_PE_T_MAX * (a)->pe_size) -#define LVM_MIN_PE_SIZE ( 8L * 2) /* 8 KB in sectors */ -#define LVM_MAX_PE_SIZE ( 16L * 1024L * 1024L * 2) /* 16GB in sectors */ -#define LVM_DEFAULT_PE_SIZE ( 4096L * 2) /* 4 MB in sectors */ +#define LVM_LV_SIZE_MAX(a) ( ( long long) LVM_PE_T_MAX * (a)->pe_size > ( long long) 1024*1024/SECTOR_SIZE*1024*1024 ? ( long long) 1024*1024/SECTOR_SIZE*1024*1024 : ( long long) LVM_PE_T_MAX * (a)->pe_size) +#define LVM_MIN_PE_SIZE ( 8192L / SECTOR_SIZE) /* 8 KB in sectors */ +#define LVM_MAX_PE_SIZE ( 16L * 1024L * 1024L / SECTOR_SIZE * 1024) /* 16GB in sectors */ +#define LVM_DEFAULT_PE_SIZE ( 4096L * 1024 / SECTOR_SIZE) /* 4 MB in sectors */ #define LVM_DEFAULT_STRIPE_SIZE 16L /* 16 KB */ -#define LVM_MIN_STRIPE_SIZE ( PAGE_SIZE>>9) /* PAGESIZE in sectors */ -#define LVM_MAX_STRIPE_SIZE ( 512L * 2) /* 512 KB in sectors */ +#define LVM_MIN_STRIPE_SIZE ( PAGE_SIZE>>9) /* PAGESIZE in sectors */ +#define LVM_MAX_STRIPE_SIZE ( 512L * 1024 / SECTOR_SIZE) /* 512 KB in sectors */ #define LVM_MAX_STRIPES 128 /* max # of stripes */ -#define LVM_MAX_SIZE ( 1024LU * 1024 * 1024 * 2) /* 1TB[sectors] */ +#define LVM_MAX_SIZE ( 1024LU * 1024 / SECTOR_SIZE * 1024 * 1024) /* 1TB[sectors] */ #define LVM_MAX_MIRRORS 2 /* future use */ -#define LVM_MIN_READ_AHEAD 0 /* minimum read ahead sectors */ -#define LVM_MAX_READ_AHEAD 256 /* maximum read ahead sectors */ +#define LVM_MIN_READ_AHEAD 2 /* minimum read ahead sectors */ +#define LVM_MAX_READ_AHEAD 120 /* maximum read ahead sectors */ #define LVM_MAX_LV_IO_TIMEOUT 60 /* seconds I/O timeout (future use) */ #define LVM_PARTITION 0xfe /* LVM partition id */ #define LVM_NEW_PARTITION 0x8e /* new LVM partition id (10/09/1999) */ @@ -279,6 +312,64 @@ #define TRUE 1 +#define LVM_GET_COW_TABLE_CHUNKS_PER_PE(vg, lv) ( \ + vg->pe_size / lv->lv_chunk_size) + +#define LVM_GET_COW_TABLE_ENTRIES_PER_PE(vg, lv) ( \ +{ \ + int COW_table_entries_per_PE; \ + int COW_table_chunks_per_PE; \ +\ + COW_table_entries_per_PE = LVM_GET_COW_TABLE_CHUNKS_PER_PE(vg, lv); \ + COW_table_chunks_per_PE = ( COW_table_entries_per_PE * sizeof(lv_COW_table_disk_t) / SECTOR_SIZE + lv->lv_chunk_size - 1) / lv->lv_chunk_size; \ + COW_table_entries_per_PE - COW_table_chunks_per_PE;}) + + +/* to disk and to core data conversion macros */ +#if __BYTE_ORDER == __BIG_ENDIAN + +#define LVM_TO_CORE16(x) ( \ + ((uint16_t)((((uint16_t)(x) & 0x00FFU) << 8) | \ + (((uint16_t)(x) & 0xFF00U) >> 8)))) + +#define LVM_TO_DISK16(x) LVM_TO_CORE16(x) + +#define LVM_TO_CORE32(x) ( \ + ((uint32_t)((((uint32_t)(x) & 0x000000FFU) << 24) | \ + (((uint32_t)(x) & 0x0000FF00U) << 8))) \ + (((uint32_t)(x) & 0x00FF0000U) >> 8))) \ + (((uint32_t)(x) & 0xFF000000U) >> 24)))) + +#define LVM_TO_DISK32(x) LVM_TO_CORE32(x) + +#define LVM_TO_CORE64(x) \ + ((uint64_t)((((uint64_t)(x) & 0x00000000000000FFULL) << 56) | \ + (((uint64_t)(x) & 0x000000000000FF00ULL) << 40) | \ + (((uint64_t)(x) & 0x0000000000FF0000ULL) << 24) | \ + (((uint64_t)(x) & 0x00000000FF000000ULL) << 8) | \ + (((uint64_t)(x) & 0x000000FF00000000ULL) >> 8) | \ + (((uint64_t)(x) & 0x0000FF0000000000ULL) >> 24) | \ + (((uint64_t)(x) & 0x00FF000000000000ULL) >> 40) | \ + (((uint64_t)(x) & 0xFF00000000000000ULL) >> 56))) + +#define LVM_TO_DISK64(x) LVM_TO_CORE64(x) + +#elif __BYTE_ORDER == __LITTLE_ENDIAN + +#define LVM_TO_CORE16(x) x +#define LVM_TO_DISK16(x) x +#define LVM_TO_CORE32(x) x +#define LVM_TO_DISK32(x) x +#define LVM_TO_CORE64(x) x +#define LVM_TO_DISK64(x) x + +#else + +#error "__BYTE_ORDER must be defined as __LITTLE_ENDIAN or __BIG_ENDIAN" + +#endif /* #if __BYTE_ORDER == __BIG_ENDIAN */ + + /* * ioctls */ @@ -294,6 +385,7 @@ #define VG_STATUS_GET_NAMELIST _IOWR ( 0xfe, 0x07, 1) #define VG_SET_EXTENDABLE _IOW ( 0xfe, 0x08, 1) +#define VG_RENAME _IOW ( 0xfe, 0x09, 1) /* logical volume */ @@ -315,6 +407,14 @@ #define LE_REMAP _IOW ( 0xfe, 0x2b, 1) +#define LV_SNAPSHOT_USE_RATE _IOWR ( 0xfe, 0x2c, 1) + +#define LV_STATUS_BYDEV _IOWR ( 0xfe, 0x2e, 1) + +#define LV_RENAME _IOW ( 0xfe, 0x2f, 1) + +#define LV_BMAP _IOWR ( 0xfe, 0x30, 1) + /* physical volume */ #define PV_STATUS _IOWR ( 0xfe, 0x40, 1) @@ -347,6 +447,8 @@ #define VG_READ 0x01 /* vg_access */ #define VG_WRITE 0x02 /* " */ +#define VG_CLUSTERED 0x04 /* " */ +#define VG_SHARED 0x08 /* " */ /* logical volume */ #define LV_ACTIVE 0x01 /* lv_status */ @@ -376,34 +478,36 @@ */ #define NAME_LEN 128 /* don't change!!! */ -#define UUID_LEN 16 /* don't change!!! */ - -/* remap physical sector/rdev pairs */ -typedef struct -{ - struct list_head hash; - ulong rsector_org; - kdev_t rdev_org; - ulong rsector_new; - kdev_t rdev_new; -} lv_block_exception_t; - +#define UUID_LEN 32 /* don't change!!! */ + +/* copy on write tables in disk format */ +typedef struct { + uint64_t pv_org_number; + uint64_t pv_org_rsector; + uint64_t pv_snap_number; + uint64_t pv_snap_rsector; +} lv_COW_table_disk_v1_t; + +/* remap physical sector/rdev pairs including hash */ +typedef struct { + struct list_head hash; + ulong rsector_org; + kdev_t rdev_org; + ulong rsector_new; + kdev_t rdev_new; +} lv_block_exception_v1_t; /* disk stored pe information */ -typedef struct - { - uint16_t lv_num; - uint16_t le_num; - } -disk_pe_t; +typedef struct { + uint16_t lv_num; + uint16_t le_num; +} pe_disk_t; /* disk stored PV, VG, LV and PE size and offset information */ -typedef struct - { - uint32_t base; - uint32_t size; - } -lvm_disk_data_t; +typedef struct { + uint32_t base; + uint32_t size; +} lvm_disk_data_t; /* @@ -411,95 +515,104 @@ lvm_disk_data_t; */ /* core */ -typedef struct - { - uint8_t id[2]; /* Identifier */ - uint16_t version; /* HM lvm version */ - lvm_disk_data_t pv_on_disk; - lvm_disk_data_t vg_on_disk; - lvm_disk_data_t pv_namelist_on_disk; - lvm_disk_data_t lv_on_disk; - lvm_disk_data_t pe_on_disk; - uint8_t pv_name[NAME_LEN]; - uint8_t vg_name[NAME_LEN]; - uint8_t system_id[NAME_LEN]; /* for vgexport/vgimport */ - kdev_t pv_dev; - uint32_t pv_number; - uint32_t pv_status; - uint32_t pv_allocatable; - uint32_t pv_size; /* HM */ - uint32_t lv_cur; - uint32_t pe_size; - uint32_t pe_total; - uint32_t pe_allocated; - uint32_t pe_stale; /* for future use */ - - disk_pe_t *pe; /* HM */ - struct inode *inode; /* HM */ - } -pv_v1_t; +typedef struct { + char id[2]; /* Identifier */ + unsigned short version; /* HM lvm version */ + lvm_disk_data_t pv_on_disk; + lvm_disk_data_t vg_on_disk; + lvm_disk_data_t pv_namelist_on_disk; + lvm_disk_data_t lv_on_disk; + lvm_disk_data_t pe_on_disk; + char pv_name[NAME_LEN]; + char vg_name[NAME_LEN]; + char system_id[NAME_LEN]; /* for vgexport/vgimport */ + kdev_t pv_dev; + uint pv_number; + uint pv_status; + uint pv_allocatable; + uint pv_size; /* HM */ + uint lv_cur; + uint pe_size; + uint pe_total; + uint pe_allocated; + uint pe_stale; /* for future use */ + pe_disk_t *pe; /* HM */ + struct inode *inode; /* HM */ +} pv_v1_t; -/* disk */ -typedef struct - { - uint8_t id[2]; /* Identifier */ - uint16_t version; /* HM lvm version */ - lvm_disk_data_t pv_on_disk; - lvm_disk_data_t vg_on_disk; - lvm_disk_data_t pv_namelist_on_disk; - lvm_disk_data_t lv_on_disk; - lvm_disk_data_t pe_on_disk; - uint8_t pv_name[NAME_LEN]; - uint8_t vg_name[NAME_LEN]; - uint8_t system_id[NAME_LEN]; /* for vgexport/vgimport */ - uint32_t pv_major; - uint32_t pv_number; - uint32_t pv_status; - uint32_t pv_allocatable; - uint32_t pv_size; /* HM */ - uint32_t lv_cur; - uint32_t pe_size; - uint32_t pe_total; - uint32_t pe_allocated; - } -pv_disk_v1_t; +/* core */ +typedef struct { + char id[2]; /* Identifier */ + unsigned short version; /* HM lvm version */ + lvm_disk_data_t pv_on_disk; + lvm_disk_data_t vg_on_disk; + lvm_disk_data_t pv_uuidlist_on_disk; + lvm_disk_data_t lv_on_disk; + lvm_disk_data_t pe_on_disk; + char pv_name[NAME_LEN]; + char vg_name[NAME_LEN]; + char system_id[NAME_LEN]; /* for vgexport/vgimport */ + kdev_t pv_dev; + uint pv_number; + uint pv_status; + uint pv_allocatable; + uint pv_size; /* HM */ + uint lv_cur; + uint pe_size; + uint pe_total; + uint pe_allocated; + uint pe_stale; /* for future use */ + pe_disk_t *pe; /* HM */ + struct inode *inode; /* HM */ + char pv_uuid[UUID_LEN+1]; +} pv_v2_t; -/* - * Structure Physical Volume (PV) Version 2 (future!) - */ +/* disk */ +typedef struct { + uint8_t id[2]; /* Identifier */ + uint16_t version; /* HM lvm version */ + lvm_disk_data_t pv_on_disk; + lvm_disk_data_t vg_on_disk; + lvm_disk_data_t pv_namelist_on_disk; + lvm_disk_data_t lv_on_disk; + lvm_disk_data_t pe_on_disk; + uint8_t pv_name[NAME_LEN]; + uint8_t vg_name[NAME_LEN]; + uint8_t system_id[NAME_LEN]; /* for vgexport/vgimport */ + uint32_t pv_major; + uint32_t pv_number; + uint32_t pv_status; + uint32_t pv_allocatable; + uint32_t pv_size; /* HM */ + uint32_t lv_cur; + uint32_t pe_size; + uint32_t pe_total; + uint32_t pe_allocated; +} pv_disk_v1_t; -typedef struct - { - uint8_t id[2]; /* Identifier */ - uint16_t version; /* HM lvm version */ - lvm_disk_data_t pv_on_disk; - lvm_disk_data_t vg_on_disk; - lvm_disk_data_t pv_uuid_on_disk; - lvm_disk_data_t lv_on_disk; - lvm_disk_data_t pe_on_disk; - uint8_t pv_name[NAME_LEN]; - uint8_t vg_name[NAME_LEN]; - uint8_t system_id[NAME_LEN]; /* for vgexport/vgimport */ - kdev_t pv_dev; - uint32_t pv_number; - uint32_t pv_status; - uint32_t pv_allocatable; - uint32_t pv_size; /* HM */ - uint32_t lv_cur; - uint32_t pe_size; - uint32_t pe_total; - uint32_t pe_allocated; - uint32_t pe_stale; /* for future use */ - disk_pe_t *pe; /* HM */ - struct inode *inode; /* HM */ - /* delta to version 1 starts here */ - uint8_t pv_uuid[UUID_LEN]; - uint32_t pv_atime; /* PV access time */ - uint32_t pv_ctime; /* PV creation time */ - uint32_t pv_mtime; /* PV modification time */ - } -pv_v2_t; +/* disk */ +typedef struct { + uint8_t id[2]; /* Identifier */ + uint16_t version; /* HM lvm version */ + lvm_disk_data_t pv_on_disk; + lvm_disk_data_t vg_on_disk; + lvm_disk_data_t pv_uuidlist_on_disk; + lvm_disk_data_t lv_on_disk; + lvm_disk_data_t pe_on_disk; + uint8_t pv_uuid[NAME_LEN]; + uint8_t vg_name[NAME_LEN]; + uint8_t system_id[NAME_LEN]; /* for vgexport/vgimport */ + uint32_t pv_major; + uint32_t pv_number; + uint32_t pv_status; + uint32_t pv_allocatable; + uint32_t pv_size; /* HM */ + uint32_t lv_cur; + uint32_t pe_size; + uint32_t pe_total; + uint32_t pe_allocated; +} pv_disk_v2_t; /* @@ -507,325 +620,256 @@ pv_v2_t; */ /* core PE information */ -typedef struct - { - kdev_t dev; - uint32_t pe; /* to be changed if > 2TB */ - uint32_t reads; - uint32_t writes; - } -pe_t; - -typedef struct - { - uint8_t lv_name[NAME_LEN]; - kdev_t old_dev; - kdev_t new_dev; - ulong old_pe; - ulong new_pe; - } -le_remap_req_t; - - - -/* - * Structure Logical Volume (LV) Version 1 - */ - -/* disk */ -typedef struct - { - uint8_t lv_name[NAME_LEN]; - uint8_t vg_name[NAME_LEN]; - uint32_t lv_access; - uint32_t lv_status; - uint32_t lv_open; /* HM */ - uint32_t lv_dev; /* HM */ - uint32_t lv_number; /* HM */ - uint32_t lv_mirror_copies; /* for future use */ - uint32_t lv_recovery; /* " */ - uint32_t lv_schedule; /* " */ - uint32_t lv_size; - uint32_t dummy; - uint32_t lv_current_le; /* for future use */ - uint32_t lv_allocated_le; - uint32_t lv_stripes; - uint32_t lv_stripesize; - uint32_t lv_badblock; /* for future use */ - uint32_t lv_allocation; - uint32_t lv_io_timeout; /* for future use */ - uint32_t lv_read_ahead; /* HM, for future use */ - } -lv_disk_v1_t; - +typedef struct { + kdev_t dev; + ulong pe; /* to be changed if > 2TB */ + ulong reads; + ulong writes; +} pe_t; + +typedef struct { + char lv_name[NAME_LEN]; + kdev_t old_dev; + kdev_t new_dev; + ulong old_pe; + ulong new_pe; +} le_remap_req_t; + +typedef struct lv_bmap { + ulong lv_block; + dev_t lv_dev; +} lv_bmap_t; /* - * Structure Logical Volume (LV) Version 2 + * Structure Logical Volume (LV) Version 3 */ /* core */ -typedef struct lv_v2 - { - uint8_t lv_name[NAME_LEN]; - uint8_t vg_name[NAME_LEN]; - uint32_t lv_access; - uint32_t lv_status; - uint32_t lv_open; /* HM */ - kdev_t lv_dev; /* HM */ - uint32_t lv_number; /* HM */ - uint32_t lv_mirror_copies; /* for future use */ - uint32_t lv_recovery; /* " */ - uint32_t lv_schedule; /* " */ - uint32_t lv_size; - pe_t *lv_current_pe; /* HM */ - uint32_t lv_current_le; /* for future use */ - uint32_t lv_allocated_le; - uint32_t lv_stripes; - uint32_t lv_stripesize; - uint32_t lv_badblock; /* for future use */ - uint32_t lv_allocation; - uint32_t lv_io_timeout; /* for future use */ - uint32_t lv_read_ahead; - - /* delta to version 1 starts here */ - struct lv_v2 *lv_snapshot_org; - struct lv_v2 *lv_snapshot_prev; - struct lv_v2 *lv_snapshot_next; - lv_block_exception_t *lv_block_exception; - uint8_t __unused; - uint32_t lv_remap_ptr; - uint32_t lv_remap_end; - uint32_t lv_chunk_size; - uint32_t lv_snapshot_minor; - struct kiobuf * lv_iobuf; - struct semaphore lv_snapshot_sem; - struct list_head * lv_snapshot_hash_table; - unsigned long lv_snapshot_hash_mask; -} lv_v2_t; +typedef struct lv_v4 { + char lv_name[NAME_LEN]; + char vg_name[NAME_LEN]; + uint lv_access; + uint lv_status; + uint lv_open; /* HM */ + kdev_t lv_dev; /* HM */ + uint lv_number; /* HM */ + uint lv_mirror_copies; /* for future use */ + uint lv_recovery; /* " */ + uint lv_schedule; /* " */ + uint lv_size; + pe_t *lv_current_pe; /* HM */ + uint lv_current_le; /* for future use */ + uint lv_allocated_le; + uint lv_stripes; + uint lv_stripesize; + uint lv_badblock; /* for future use */ + uint lv_allocation; + uint lv_io_timeout; /* for future use */ + uint lv_read_ahead; + + /* delta to version 1 starts here */ + struct lv_v4 *lv_snapshot_org; + struct lv_v4 *lv_snapshot_prev; + struct lv_v4 *lv_snapshot_next; + lv_block_exception_t *lv_block_exception; + uint lv_remap_ptr; + uint lv_remap_end; + uint lv_chunk_size; + uint lv_snapshot_minor; +#ifdef __KERNEL__ + struct kiobuf *lv_iobuf; + struct semaphore lv_snapshot_sem; + struct list_head *lv_snapshot_hash_table; + ulong lv_snapshot_hash_table_size; + ulong lv_snapshot_hash_mask; + struct page *lv_COW_table_page; + wait_queue_head_t lv_snapshot_wait; + int lv_snapshot_use_rate; + void *vg; +#else + char dummy[200]; +#endif +} lv_v4_t; /* disk */ -typedef struct - { - uint8_t lv_name[NAME_LEN]; - uint8_t vg_name[NAME_LEN]; - uint32_t lv_access; - uint32_t lv_status; - uint32_t lv_open; /* HM */ - uint32_t lv_dev; /* HM */ - uint32_t lv_number; /* HM */ - uint32_t lv_mirror_copies; /* for future use */ - uint32_t lv_recovery; /* " */ - uint32_t lv_schedule; /* " */ - uint32_t lv_size; - uint32_t dummy; - uint32_t lv_current_le; /* for future use */ - uint32_t lv_allocated_le; - uint32_t lv_stripes; - uint32_t lv_stripesize; - uint32_t lv_badblock; /* for future use */ - uint32_t lv_allocation; - uint32_t lv_io_timeout; /* for future use */ - uint32_t lv_read_ahead; /* HM, for future use */ - } -lv_disk_v2_t; - +typedef struct { + uint8_t lv_name[NAME_LEN]; + uint8_t vg_name[NAME_LEN]; + uint32_t lv_access; + uint32_t lv_status; + uint32_t lv_open; /* HM */ + uint32_t lv_dev; /* HM */ + uint32_t lv_number; /* HM */ + uint32_t lv_mirror_copies; /* for future use */ + uint32_t lv_recovery; /* " */ + uint32_t lv_schedule; /* " */ + uint32_t lv_size; + uint32_t lv_snapshot_minor;/* minor number of original */ + uint16_t lv_chunk_size; /* chunk size of snapshot */ + uint16_t dummy; + uint32_t lv_allocated_le; + uint32_t lv_stripes; + uint32_t lv_stripesize; + uint32_t lv_badblock; /* for future use */ + uint32_t lv_allocation; + uint32_t lv_io_timeout; /* for future use */ + uint32_t lv_read_ahead; /* HM */ +} lv_disk_v3_t; /* * Structure Volume Group (VG) Version 1 */ -typedef struct - { - uint8_t vg_name[NAME_LEN]; /* volume group name */ - uint32_t vg_number; /* volume group number */ - uint32_t vg_access; /* read/write */ - uint32_t vg_status; /* active or not */ - uint32_t lv_max; /* maximum logical volumes */ - uint32_t lv_cur; /* current logical volumes */ - uint32_t lv_open; /* open logical volumes */ - uint32_t pv_max; /* maximum physical volumes */ - uint32_t pv_cur; /* current physical volumes FU */ - uint32_t pv_act; /* active physical volumes */ - uint32_t dummy; /* was obsolete max_pe_per_pv */ - uint32_t vgda; /* volume group descriptor arrays FU */ - uint32_t pe_size; /* physical extent size in sectors */ - uint32_t pe_total; /* total of physical extents */ - uint32_t pe_allocated; /* allocated physical extents */ - uint32_t pvg_total; /* physical volume groups FU */ - struct proc_dir_entry *proc; - pv_t *pv[ABS_MAX_PV + 1]; /* physical volume struct pointers */ - lv_t *lv[ABS_MAX_LV + 1]; /* logical volume struct pointers */ - } -vg_v1_t; - -typedef struct - { - uint8_t vg_name[NAME_LEN]; /* volume group name */ - uint32_t vg_number; /* volume group number */ - uint32_t vg_access; /* read/write */ - uint32_t vg_status; /* active or not */ - uint32_t lv_max; /* maximum logical volumes */ - uint32_t lv_cur; /* current logical volumes */ - uint32_t lv_open; /* open logical volumes */ - uint32_t pv_max; /* maximum physical volumes */ - uint32_t pv_cur; /* current physical volumes FU */ - uint32_t pv_act; /* active physical volumes */ - uint32_t dummy; - uint32_t vgda; /* volume group descriptor arrays FU */ - uint32_t pe_size; /* physical extent size in sectors */ - uint32_t pe_total; /* total of physical extents */ - uint32_t pe_allocated; /* allocated physical extents */ - uint32_t pvg_total; /* physical volume groups FU */ - } -vg_disk_v1_t; - -/* - * Structure Volume Group (VG) Version 2 - */ - -typedef struct - { - uint8_t vg_name[NAME_LEN]; /* volume group name */ - uint32_t vg_number; /* volume group number */ - uint32_t vg_access; /* read/write */ - uint32_t vg_status; /* active or not */ - uint32_t lv_max; /* maximum logical volumes */ - uint32_t lv_cur; /* current logical volumes */ - uint32_t lv_open; /* open logical volumes */ - uint32_t pv_max; /* maximum physical volumes */ - uint32_t pv_cur; /* current physical volumes FU */ - uint32_t pv_act; /* future: active physical volumes */ - uint32_t max_pe_per_pv; /* OBSOLETE maximum PE/PV */ - uint32_t vgda; /* volume group descriptor arrays FU */ - uint32_t pe_size; /* physical extent size in sectors */ - uint32_t pe_total; /* total of physical extents */ - uint32_t pe_allocated; /* allocated physical extents */ - uint32_t pvg_total; /* physical volume groups FU */ - struct proc_dir_entry *proc; - pv_t *pv[ABS_MAX_PV + 1]; /* physical volume struct pointers */ - lv_t *lv[ABS_MAX_LV + 1]; /* logical volume struct pointers */ - /* delta to version 1 starts here */ - uint8_t vg_uuid[UUID_LEN]; /* volume group UUID */ - time_t vg_atime; /* VG access time */ - time_t vg_ctime; /* VG creation time */ - time_t vg_mtime; /* VG modification time */ - } -vg_v2_t; - +/* core */ +typedef struct { + char vg_name[NAME_LEN]; /* volume group name */ + uint vg_number; /* volume group number */ + uint vg_access; /* read/write */ + uint vg_status; /* active or not */ + uint lv_max; /* maximum logical volumes */ + uint lv_cur; /* current logical volumes */ + uint lv_open; /* open logical volumes */ + uint pv_max; /* maximum physical volumes */ + uint pv_cur; /* current physical volumes FU */ + uint pv_act; /* active physical volumes */ + uint dummy; /* was obsolete max_pe_per_pv */ + uint vgda; /* volume group descriptor arrays FU */ + uint pe_size; /* physical extent size in sectors */ + uint pe_total; /* total of physical extents */ + uint pe_allocated; /* allocated physical extents */ + uint pvg_total; /* physical volume groups FU */ + struct proc_dir_entry *proc; + pv_t *pv[ABS_MAX_PV + 1]; /* physical volume struct pointers */ + lv_t *lv[ABS_MAX_LV + 1]; /* logical volume struct pointers */ +} vg_v1_t; + +typedef struct { + char vg_name[NAME_LEN]; /* volume group name */ + uint vg_number; /* volume group number */ + uint vg_access; /* read/write */ + uint vg_status; /* active or not */ + uint lv_max; /* maximum logical volumes */ + uint lv_cur; /* current logical volumes */ + uint lv_open; /* open logical volumes */ + uint pv_max; /* maximum physical volumes */ + uint pv_cur; /* current physical volumes FU */ + uint pv_act; /* active physical volumes */ + uint dummy; /* was obsolete max_pe_per_pv */ + uint vgda; /* volume group descriptor arrays FU */ + uint pe_size; /* physical extent size in sectors */ + uint pe_total; /* total of physical extents */ + uint pe_allocated; /* allocated physical extents */ + uint pvg_total; /* physical volume groups FU */ + struct proc_dir_entry *proc; + pv_t *pv[ABS_MAX_PV + 1]; /* physical volume struct pointers */ + lv_t *lv[ABS_MAX_LV + 1]; /* logical volume struct pointers */ + char vg_uuid[UUID_LEN+1]; /* volume group UUID */ +#ifdef __KERNEL__ + struct proc_dir_entry *vg_dir_pde; + struct proc_dir_entry *lv_subdir_pde; + struct proc_dir_entry *pv_subdir_pde; +#else + char dummy1[200]; +#endif +} vg_v3_t; -/* - * Timekeeping structure on disk (0.7 feature) - * - * Holds several timestamps for start/stop time of non - * atomic VGDA disk i/o operations - * - */ -typedef struct - { - uint32_t seconds; /* seconds since the epoch */ - uint32_t jiffies; /* micro timer */ - } -lvm_time_t; - -#define TIMESTAMP_ID_SIZE 2 -typedef struct - { - uint8_t id[TIMESTAMP_ID_SIZE]; /* Identifier */ - lvm_time_t pv_vg_lv_pe_io_begin; - lvm_time_t pv_vg_lv_pe_io_end; - lvm_time_t pv_io_begin; - lvm_time_t pv_io_end; - lvm_time_t vg_io_begin; - lvm_time_t vg_io_end; - lvm_time_t lv_io_begin; - lvm_time_t lv_io_end; - lvm_time_t pe_io_begin; - lvm_time_t pe_io_end; - lvm_time_t pe_move_io_begin; - lvm_time_t pe_move_io_end; - uint8_t dummy[LVM_TIMESTAMP_DISK_SIZE - - TIMESTAMP_ID_SIZE - - 12 * sizeof (lvm_time_t)]; - /* ATTENTION ^^ */ - } -timestamp_disk_t; - -/* same on disk and in core so far */ -typedef timestamp_disk_t timestamp_t; - -/* function identifiers for timestamp actions */ -typedef enum - { - PV_VG_LV_PE_IO_BEGIN, - PV_VG_LV_PE_IO_END, - PV_IO_BEGIN, - PV_IO_END, - VG_IO_BEGIN, - VG_IO_END, - LV_IO_BEGIN, - LV_IO_END, - PE_IO_BEGIN, - PE_IO_END, - PE_MOVE_IO_BEGIN, - PE_MOVE_IO_END - } -ts_fct_id_t; +/* disk */ +typedef struct { + uint8_t vg_name[NAME_LEN]; /* volume group name */ + uint32_t vg_number; /* volume group number */ + uint32_t vg_access; /* read/write */ + uint32_t vg_status; /* active or not */ + uint32_t lv_max; /* maximum logical volumes */ + uint32_t lv_cur; /* current logical volumes */ + uint32_t lv_open; /* open logical volumes */ + uint32_t pv_max; /* maximum physical volumes */ + uint32_t pv_cur; /* current physical volumes FU */ + uint32_t pv_act; /* active physical volumes */ + uint32_t dummy; + uint32_t vgda; /* volume group descriptor arrays FU */ + uint32_t pe_size; /* physical extent size in sectors */ + uint32_t pe_total; /* total of physical extents */ + uint32_t pe_allocated; /* allocated physical extents */ + uint32_t pvg_total; /* physical volume groups FU */ +} vg_disk_v1_t; + +typedef struct { + uint8_t vg_uuid[UUID_LEN]; /* volume group UUID */ + uint8_t vg_name_dummy[NAME_LEN-UUID_LEN]; /* rest of v1 VG name */ + uint32_t vg_number; /* volume group number */ + uint32_t vg_access; /* read/write */ + uint32_t vg_status; /* active or not */ + uint32_t lv_max; /* maximum logical volumes */ + uint32_t lv_cur; /* current logical volumes */ + uint32_t lv_open; /* open logical volumes */ + uint32_t pv_max; /* maximum physical volumes */ + uint32_t pv_cur; /* current physical volumes FU */ + uint32_t pv_act; /* active physical volumes */ + uint32_t dummy; + uint32_t vgda; /* volume group descriptor arrays FU */ + uint32_t pe_size; /* physical extent size in sectors */ + uint32_t pe_total; /* total of physical extents */ + uint32_t pe_allocated; /* allocated physical extents */ + uint32_t pvg_total; /* physical volume groups FU */ +} vg_disk_v2_t; /* * Request structures for ioctls */ -/* Request structure PV_STATUS */ -typedef struct - { - char pv_name[NAME_LEN]; - pv_t *pv; - } -pv_status_req_t, pv_change_req_t; +/* Request structure PV_STATUS_BY_NAME... */ +typedef struct { + char pv_name[NAME_LEN]; + pv_t *pv; +} pv_status_req_t, pv_change_req_t; /* Request structure PV_FLUSH */ -typedef struct - { - char pv_name[NAME_LEN]; - } -pv_flush_req_t; +typedef struct { + char pv_name[NAME_LEN]; + kdev_t pv_dev; +} pv_flush_req_t; /* Request structure PE_MOVE */ -typedef struct - { - enum - { - LOCK_PE, UNLOCK_PE - } - lock; - struct - { - kdev_t lv_dev; - kdev_t pv_dev; - uint32_t pv_offset; - } - data; - } -pe_lock_req_t; +typedef struct { + enum { + LOCK_PE, UNLOCK_PE + } lock; + struct { + kdev_t lv_dev; + kdev_t pv_dev; + ulong pv_offset; + } data; +} pe_lock_req_t; /* Request structure LV_STATUS_BYNAME */ -typedef struct - { - char lv_name[NAME_LEN]; - lv_t *lv; - } -lv_status_byname_req_t, lv_req_t; +typedef struct { + char lv_name[NAME_LEN]; + lv_t *lv; +} lv_status_byname_req_t, lv_req_t; /* Request structure LV_STATUS_BYINDEX */ -typedef struct - { - ulong lv_index; - lv_t *lv; - } -lv_status_byindex_req_t; - -#endif /* #ifndef _LVM_H_INCLUDE */ +typedef struct { + ulong lv_index; + lv_t *lv; + /* Transfer size because user space and kernel space differ */ + ushort size; +} lv_status_byindex_req_t; + +/* Request structure LV_STATUS_BYDEV... */ +typedef struct { + dev_t dev; + pv_t *lv; +} lv_status_bydev_req_t; + + +/* Request structure LV_SNAPSHOT_USE_RATE */ +typedef struct { + int block; + int rate; +} lv_snapshot_use_rate_req_t; + +#endif /* #ifndef _LVM_H_INCLUDE */ diff --git a/include/linux/mm.h b/include/linux/mm.h index 850de4dbaf94..8cffeb8a90a9 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -413,7 +413,6 @@ extern void swapin_readahead(swp_entry_t); /* mmap.c */ extern void lock_vma_mappings(struct vm_area_struct *); extern void unlock_vma_mappings(struct vm_area_struct *); -extern void merge_segments(struct mm_struct *, unsigned long, unsigned long); extern void insert_vm_struct(struct mm_struct *, struct vm_area_struct *); extern void __insert_vm_struct(struct mm_struct *, struct vm_area_struct *); extern void build_mmap_avl(struct mm_struct *); diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h index 8c260c63d598..b98eb998e4f3 100644 --- a/include/linux/raid/md_k.h +++ b/include/linux/raid/md_k.h @@ -350,6 +350,7 @@ do { \ if (condition) \ break; \ spin_unlock_irq(&lock); \ + run_task_queue(&tq_disk); \ schedule(); \ spin_lock_irq(&lock); \ } \ diff --git a/include/linux/raid/raid5.h b/include/linux/raid/raid5.h index dd5ad01ae269..d46dbd51264e 100644 --- a/include/linux/raid/raid5.h +++ b/include/linux/raid/raid5.h @@ -4,72 +4,167 @@ #include #include -struct disk_info { - kdev_t dev; - int operational; - int number; - int raid_disk; - int write_only; - int spare; - int used_slot; -}; - +/* + * + * Each stripe contains one buffer per disc. Each buffer can be in + * one of a number of states determined by bh_state. Changes between + * these states happen *almost* exclusively under a per-stripe + * spinlock. Some very specific changes can happen in b_end_io, and + * these are not protected by the spin lock. + * + * The bh_state bits that are used to represent these states are: + * BH_Uptodate, BH_Lock + * + * State Empty == !Uptodate, !Lock + * We have no data, and there is no active request + * State Want == !Uptodate, Lock + * A read request is being submitted for this block + * State Dirty == Uptodate, Lock + * Some new data is in this buffer, and it is being written out + * State Clean == Uptodate, !Lock + * We have valid data which is the same as on disc + * + * The possible state transitions are: + * + * Empty -> Want - on read or write to get old data for parity calc + * Empty -> Dirty - on compute_parity to satisfy write/sync request.(RECONSTRUCT_WRITE) + * Empty -> Clean - on compute_block when computing a block for failed drive + * Want -> Empty - on failed read + * Want -> Clean - on successful completion of read request + * Dirty -> Clean - on successful completion of write request + * Dirty -> Clean - on failed write + * Clean -> Dirty - on compute_parity to satisfy write/sync (RECONSTRUCT or RMW) + * + * The Want->Empty, Want->Clean, Dirty->Clean, transitions + * all happen in b_end_io at interrupt time. + * Each sets the Uptodate bit before releasing the Lock bit. + * This leaves one multi-stage transition: + * Want->Dirty->Clean + * This is safe because thinking that a Clean buffer is actually dirty + * will at worst delay some action, and the stripe will be scheduled + * for attention after the transition is complete. + * + * There is one possibility that is not covered by these states. That + * is if one drive has failed and there is a spare being rebuilt. We + * can't distinguish between a clean block that has been generated + * from parity calculations, and a clean block that has been + * successfully written to the spare ( or to parity when resyncing). + * To distingush these states we have a stripe bit STRIPE_INSYNC that + * is set whenever a write is scheduled to the spare, or to the parity + * disc if there is no spare. A sync request clears this bit, and + * when we find it set with no buffers locked, we know the sync is + * complete. + * + * Buffers for the md device that arrive via make_request are attached + * to the appropriate stripe in one of two lists linked on b_reqnext. + * One list for read requests, one for write. There should never be + * more than one buffer on the two lists together, but we are not + * guaranteed of that so we allow for more. + * + * If a buffer is on the read list when the associated cache buffer is + * Uptodate, the data is copied into the read buffer and it's b_end_io + * routine is called. This may happen in the end_request routine only + * if the buffer has just successfully been read. end_request should + * remove the buffers from the list and then set the Uptodate bit on + * the buffer. Other threads may do this only if they first check + * that the Uptodate bit is set. Once they have checked that they may + * take buffers off the read queue. + * + * When a buffer on the write_list is committed for write, it is + * marked clean, copied into the cache buffer, which is then marked + * dirty, and moved onto a third list, the written list. Once both + * the parity block and the cached buffer are successfully written, + * any buffer on a written list can be returned with b_end_io. + * + * The write_list and read_list lists act as fifos. They are protected by the + * device_lock which can be claimed when a stripe_lock is held. + * The device_lock is only for list manipulations and will only be held for a very + * short time. It can be claimed from interrupts. + * + * + * Stripes in the stripe cache can be on one of two lists (or on + * neither). The "inactive_list" contains stripes which are not + * currently being used for any request. They can freely be reused + * for another stripe. The "handle_list" contains stripes that need + * to be handled in some way. Both of these are fifo queues. Each + * stripe is also (potentially) linked to a hash bucket in the hash + * table so that it can be found by sector number. Stripes that are + * not hashed must be on the inactive_list, and will normally be at + * the front. All stripes start life this way. + * + * The inactive_list, handle_list and hash bucket lists are all protected by the + * device_lock. + * - stripes on the inactive_list never have their stripe_lock held. + * - stripes have a reference counter. If count==0, they are on a list. + * - If a stripe might need handling, STRIPE_HANDLE is set. + * - When refcount reaches zero, then if STRIPE_HANDLE it is put on + * handle_list else inactive_list + * + * This, combined with the fact that STRIPE_HANDLE is only ever + * cleared while a stripe has a non-zero count means that if the + * refcount is 0 and STRIPE_HANDLE is set, then it is on the + * handle_list and if recount is 0 and STRIPE_HANDLE is not set, then + * the stripe is on inactive_list. + * + * The possible transitions are: + * activate an unhashed/inactive stripe (get_active_stripe()) + * lockdev check-hash unlink-stripe cnt++ clean-stripe hash-stripe unlockdev + * activate a hashed, possibly active stripe (get_active_stripe()) + * lockdev check-hash if(!cnt++)unlink-stripe unlockdev + * attach a request to an active stripe (add_stripe_bh()) + * lockdev attach-buffer unlockdev + * handle a stripe (handle_stripe()) + * lockstripe clrSTRIPE_HANDLE ... (lockdev check-buffers unlockdev) .. change-state .. record io needed unlockstripe schedule io + * release an active stripe (release_stripe()) + * lockdev if (!--cnt) { if STRIPE_HANDLE, add to handle_list else add to inactive-list } unlockdev + * + * The refcount counts each thread that have activated the stripe, + * plus raid5d if it is handling it, plus one for each active request + * on a cached buffer. + */ struct stripe_head { - md_spinlock_t stripe_lock; struct stripe_head *hash_next, **hash_pprev; /* hash pointers */ - struct stripe_head *free_next; /* pool of free sh's */ - struct buffer_head *buffer_pool; /* pool of free buffers */ - struct buffer_head *bh_pool; /* pool of free bh's */ + struct list_head lru; /* inactive_list or handle_list */ struct raid5_private_data *raid_conf; - struct buffer_head *bh_old[MD_SB_DISKS]; /* disk image */ - struct buffer_head *bh_new[MD_SB_DISKS]; /* buffers of the MD device (present in buffer cache) */ - struct buffer_head *bh_copy[MD_SB_DISKS]; /* copy on write of bh_new (bh_new can change from under us) */ - struct buffer_head *bh_req[MD_SB_DISKS]; /* copy of bh_new (only the buffer heads), queued to the lower levels */ - int cmd_new[MD_SB_DISKS]; /* READ/WRITE for new */ - int new[MD_SB_DISKS]; /* buffer added since the last handle_stripe() */ + struct buffer_head *bh_cache[MD_SB_DISKS]; /* buffered copy */ + struct buffer_head *bh_read[MD_SB_DISKS]; /* read request buffers of the MD device */ + struct buffer_head *bh_write[MD_SB_DISKS]; /* write request buffers of the MD device */ + struct buffer_head *bh_written[MD_SB_DISKS]; /* write request buffers of the MD device that have been scheduled for write */ unsigned long sector; /* sector of this row */ int size; /* buffers size */ int pd_idx; /* parity disk index */ - atomic_t nr_pending; /* nr of pending cmds */ unsigned long state; /* state flags */ - int cmd; /* stripe cmd */ - atomic_t count; /* nr of waiters */ - int write_method; /* reconstruct-write / read-modify-write */ - int phase; /* PHASE_BEGIN, ..., PHASE_COMPLETE */ - md_wait_queue_head_t wait; /* processes waiting for this stripe */ - + atomic_t count; /* nr of active thread/requests */ + spinlock_t lock; int sync_redone; }; -/* - * Phase - */ -#define PHASE_BEGIN 0 -#define PHASE_READ_OLD 1 -#define PHASE_WRITE 2 -#define PHASE_READ 3 -#define PHASE_COMPLETE 4 /* * Write method */ -#define METHOD_NONE 0 #define RECONSTRUCT_WRITE 1 #define READ_MODIFY_WRITE 2 +/* not a write method, but a compute_parity mode */ +#define CHECK_PARITY 3 /* * Stripe state */ -#define STRIPE_LOCKED 0 #define STRIPE_ERROR 1 +#define STRIPE_HANDLE 2 +#define STRIPE_SYNCING 3 +#define STRIPE_INSYNC 4 -/* - * Stripe commands - */ -#define STRIPE_NONE 0 -#define STRIPE_WRITE 1 -#define STRIPE_READ 2 -#define STRIPE_SYNC 3 +struct disk_info { + kdev_t dev; + int operational; + int number; + int raid_disk; + int write_only; + int spare; + int used_slot; +}; struct raid5_private_data { struct stripe_head **stripe_hashtbl; @@ -80,23 +175,15 @@ struct raid5_private_data { int buffer_size; int chunk_size, level, algorithm; int raid_disks, working_disks, failed_disks; - unsigned long next_sector; - atomic_t nr_handle; - struct stripe_head *next_free_stripe; - atomic_t nr_stripes; int resync_parity; int max_nr_stripes; - int clock; - atomic_t nr_hashed_stripes; - atomic_t nr_locked_stripes; - atomic_t nr_pending_stripes; - atomic_t nr_cached_stripes; + struct list_head handle_list; /* stripes needing handling */ /* * Free stripes pool */ - atomic_t nr_free_sh; - struct stripe_head *free_sh_list; + atomic_t active_stripes; + struct list_head inactive_list; md_wait_queue_head_t wait_for_stripe; md_spinlock_t device_lock; diff --git a/ipc/shm.c b/ipc/shm.c index 76fb5556135a..5d4ee22a6da0 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -15,23 +15,13 @@ * */ -#include -#include #include #include -#include -#include #include -#include #include #include -#include -#include #include -#include - #include -#include #include "util.h" @@ -109,6 +99,7 @@ static inline void shm_inc (int id) { BUG(); shp->shm_atim = CURRENT_TIME; shp->shm_lprid = current->pid; + shp->shm_nattch++; shm_unlock(id); } @@ -123,21 +114,14 @@ static void shm_open (struct vm_area_struct *shmd) * * @shp: struct to free * - * It has to be called with shp and shm_ids.sem locked and will - * release them + * It has to be called with shp and shm_ids.sem locked */ static void shm_destroy (struct shmid_kernel *shp) { - struct file * file = shp->shm_file; - - shp->shm_file = NULL; shm_tot -= (shp->shm_segsz + PAGE_SIZE - 1) >> PAGE_SHIFT; - shm_unlock (shp->id); shm_rmid (shp->id); + fput (shp->shm_file); kfree (shp); - up (&shm_ids.sem); - /* put the file outside the critical path to prevent recursion */ - fput (file); } /* @@ -158,10 +142,10 @@ static void shm_close (struct vm_area_struct *shmd) BUG(); shp->shm_lprid = current->pid; shp->shm_dtim = CURRENT_TIME; - if(shp->shm_flags & SHM_DEST && - file_count (file) == 2) /* shp and the vma have the last - references*/ - return shm_destroy (shp); + shp->shm_nattch--; + if(shp->shm_nattch == 0 && + shp->shm_flags & SHM_DEST) + shm_destroy (shp); shm_unlock(id); up (&shm_ids.sem); @@ -176,7 +160,7 @@ static int shm_mmap(struct file * file, struct vm_area_struct * vma) } static struct file_operations shm_file_operations = { - mmap: shm_mmap + mmap: shm_mmap }; static struct vm_operations_struct shm_vm_ops = { @@ -218,9 +202,10 @@ static int newseg (key_t key, int shmflg, size_t size) shp->shm_atim = shp->shm_dtim = 0; shp->shm_ctim = CURRENT_TIME; shp->shm_segsz = size; + shp->shm_nattch = 0; shp->id = shm_buildid(id,shp->shm_perm.seq); shp->shm_file = file; - file->f_dentry->d_inode->i_ino = id; + file->f_dentry->d_inode->i_ino = shp->id; file->f_op = &shm_file_operations; shm_tot += numpages; shm_unlock (id); @@ -370,15 +355,13 @@ static void shm_get_stat (unsigned long *rss, unsigned long *swp) struct inode * inode; shp = shm_get(i); - if(shp == NULL || shp->shm_file == NULL) + if(shp == NULL) continue; inode = shp->shm_file->f_dentry->d_inode; - down (&inode->i_sem); - *rss += inode->i_mapping->nrpages; spin_lock (&inode->u.shmem_i.lock); + *rss += inode->i_mapping->nrpages; *swp += inode->u.shmem_i.swapped; spin_unlock (&inode->u.shmem_i.lock); - up (&inode->i_sem); } } @@ -462,7 +445,7 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds *buf) tbuf.shm_ctime = shp->shm_ctim; tbuf.shm_cpid = shp->shm_cprid; tbuf.shm_lpid = shp->shm_lprid; - tbuf.shm_nattch = file_count (shp->shm_file) - 1; + tbuf.shm_nattch = shp->shm_nattch; shm_unlock(shmid); if(copy_shmid_to_user (buf, &tbuf, version)) return -EFAULT; @@ -512,13 +495,12 @@ asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds *buf) goto out_up; err = shm_checkid(shp, shmid); if (err == 0) { - if (file_count (shp->shm_file) == 1) { + if (shp->shm_nattch){ + shp->shm_flags |= SHM_DEST; + /* Do not find it any more */ + shp->shm_perm.key = IPC_PRIVATE; + } else shm_destroy (shp); - return 0; - } - shp->shm_flags |= SHM_DEST; - /* Do not find it any more */ - shp->shm_perm.key = IPC_PRIVATE; } /* Unlock */ shm_unlock(shmid); @@ -619,13 +601,23 @@ asmlinkage long sys_shmat (int shmid, char *shmaddr, int shmflg, ulong *raddr) return -EACCES; } file = shp->shm_file; - get_file (file); + shp->shm_nattch++; shm_unlock(shmid); down(¤t->mm->mmap_sem); user_addr = (void *) do_mmap (file, addr, file->f_dentry->d_inode->i_size, prot, flags, 0); up(¤t->mm->mmap_sem); - fput (file); + + down (&shm_ids.sem); + if(!(shp = shm_lock(shmid))) + BUG(); + shp->shm_nattch--; + if(shp->shm_nattch == 0 && + shp->shm_flags & SHM_DEST) + shm_destroy (shp); + shm_unlock(shmid); + up (&shm_ids.sem); + *raddr = (unsigned long) user_addr; err = 0; if (IS_ERR(user_addr)) @@ -684,7 +676,7 @@ static int sysvipc_shm_read_proc(char *buffer, char **start, off_t offset, int l shp->shm_segsz, shp->shm_cprid, shp->shm_lprid, - file_count (shp->shm_file) - 1, + shp->shm_nattch, shp->shm_perm.uid, shp->shm_perm.gid, shp->shm_perm.cuid, diff --git a/kernel/Makefile b/kernel/Makefile index 435f8278a014..9adeb6b2c392 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -9,7 +9,7 @@ O_TARGET := kernel.o -export-objs = signal.o sys.o kmod.o context.o ksyms.o +export-objs = signal.o sys.o kmod.o context.o ksyms.o pm.o obj-y = sched.o dma.o fork.o exec_domain.o panic.o printk.o \ module.o exit.o itimer.o info.o time.o softirq.o resource.o \ diff --git a/kernel/ksyms.c b/kernel/ksyms.c index 545ec28f279c..3d1cd0394ddb 100644 --- a/kernel/ksyms.c +++ b/kernel/ksyms.c @@ -188,6 +188,7 @@ EXPORT_SYMBOL(bread); EXPORT_SYMBOL(__brelse); EXPORT_SYMBOL(__bforget); EXPORT_SYMBOL(ll_rw_block); +EXPORT_SYMBOL(submit_bh); EXPORT_SYMBOL(__wait_on_buffer); EXPORT_SYMBOL(___wait_on_page); EXPORT_SYMBOL(block_write_full_page); @@ -474,6 +475,7 @@ EXPORT_SYMBOL(si_meminfo); EXPORT_SYMBOL(sys_tz); EXPORT_SYMBOL(__wait_on_super); EXPORT_SYMBOL(file_fsync); +EXPORT_SYMBOL(fsync_inode_buffers); EXPORT_SYMBOL(clear_inode); EXPORT_SYMBOL(nr_async_pages); EXPORT_SYMBOL(___strtok); @@ -483,6 +485,7 @@ EXPORT_SYMBOL(get_hash_table); EXPORT_SYMBOL(get_empty_inode); EXPORT_SYMBOL(insert_inode_hash); EXPORT_SYMBOL(remove_inode_hash); +EXPORT_SYMBOL(buffer_insert_inode_queue); EXPORT_SYMBOL(make_bad_inode); EXPORT_SYMBOL(is_bad_inode); EXPORT_SYMBOL(event); diff --git a/kernel/softirq.c b/kernel/softirq.c index 63cd7a1dbd45..fe066399dafa 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -303,14 +303,15 @@ void __run_task_queue(task_queue *list) while (next != &head) { void (*f) (void *); struct tq_struct *p; + void *data; p = list_entry(next, struct tq_struct, list); next = next->next; - /* Debug: force an oops from people who delete entries */ - next->prev->next = next->prev->prev = 0; f = p->routine; + data = p->data; + wmb(); p->sync = 0; if (f) - f(p->data); + f(data); } } diff --git a/mm/filemap.c b/mm/filemap.c index 5b11f5fe78a1..b86306226d3b 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -2439,6 +2439,17 @@ generic_file_write(struct file *file,const char *buf,size_t count,loff_t *ppos) if (bytes > count) bytes = count; + /* + * Bring in the user page that we will copy from _first_. + * Otherwise there's a nasty deadlock on copying from the + * same page as we're writing to, without it being marked + * up-to-date. + */ + { volatile unsigned char dummy; + __get_user(dummy, buf); + __get_user(dummy, buf+bytes-1); + } + status = -ENOMEM; /* we'll assign it later anyway */ page = __grab_cache_page(mapping, index, &cached_page); if (!page) diff --git a/mm/mlock.c b/mm/mlock.c index 551d61d39b7e..16e9f947b4d6 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -189,9 +189,6 @@ static int do_mlock(unsigned long start, size_t len, int on) break; } } - spin_lock(¤t->mm->page_table_lock); - merge_segments(current->mm, start, end); - spin_unlock(¤t->mm->page_table_lock); return error; } @@ -263,9 +260,6 @@ static int do_mlockall(int flags) if (error) break; } - spin_lock(¤t->mm->page_table_lock); - merge_segments(current->mm, 0, TASK_SIZE); - spin_unlock(¤t->mm->page_table_lock); return error; } diff --git a/mm/mmap.c b/mm/mmap.c index 8a96594fcf4d..f74b2487d8a8 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -338,27 +338,17 @@ unsigned long do_mmap_pgoff(struct file * file, unsigned long addr, unsigned lon goto free_vma; } - /* - * merge_segments may merge our vma, so we can't refer to it - * after the call. Save the values we need now ... - */ - flags = vma->vm_flags; - /* Can addr have changed?? * * Answer: Yes, several device drivers can do it in their * f_op->mmap method. -DaveM */ + flags = vma->vm_flags; addr = vma->vm_start; - lock_vma_mappings(vma); - spin_lock(&mm->page_table_lock); - __insert_vm_struct(mm, vma); - unlock_vma_mappings(vma); + insert_vm_struct(mm, vma); if (correct_wcount) atomic_inc(&file->f_dentry->d_inode->i_writecount); - merge_segments(mm, vma->vm_start, vma->vm_end); - spin_unlock(&mm->page_table_lock); mm->total_vm += len >> PAGE_SHIFT; if (flags & VM_LOCKED) { @@ -828,6 +818,23 @@ unsigned long do_brk(unsigned long addr, unsigned long len) if (!vm_enough_memory(len >> PAGE_SHIFT)) return -ENOMEM; + flags = vm_flags(PROT_READ|PROT_WRITE|PROT_EXEC, + MAP_FIXED|MAP_PRIVATE) | mm->def_flags; + + flags |= VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC; + + + /* Can we just expand an old anonymous mapping? */ + if (addr) { + struct vm_area_struct * vma = find_vma(mm, addr-1); + if (vma && vma->vm_end == addr && !vma->vm_file && + vma->vm_flags == flags) { + vma->vm_end = addr + len; + goto out; + } + } + + /* * create a vma struct for an anonymous mapping */ @@ -838,30 +845,16 @@ unsigned long do_brk(unsigned long addr, unsigned long len) vma->vm_mm = mm; vma->vm_start = addr; vma->vm_end = addr + len; - vma->vm_flags = vm_flags(PROT_READ|PROT_WRITE|PROT_EXEC, - MAP_FIXED|MAP_PRIVATE) | mm->def_flags; - - vma->vm_flags |= VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC; - vma->vm_page_prot = protection_map[vma->vm_flags & 0x0f]; + vma->vm_flags = flags; + vma->vm_page_prot = protection_map[flags & 0x0f]; vma->vm_ops = NULL; vma->vm_pgoff = 0; vma->vm_file = NULL; vma->vm_private_data = NULL; - /* - * merge_segments may merge our vma, so we can't refer to it - * after the call. Save the values we need now ... - */ - flags = vma->vm_flags; - addr = vma->vm_start; + insert_vm_struct(mm, vma); - lock_vma_mappings(vma); - spin_lock(&mm->page_table_lock); - __insert_vm_struct(mm, vma); - unlock_vma_mappings(vma); - merge_segments(mm, vma->vm_start, vma->vm_end); - spin_unlock(&mm->page_table_lock); - +out: mm->total_vm += len >> PAGE_SHIFT; if (flags & VM_LOCKED) { mm->locked_vm += len >> PAGE_SHIFT; @@ -972,84 +965,8 @@ void __insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vmp) void insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vmp) { lock_vma_mappings(vmp); + spin_lock(¤t->mm->page_table_lock); __insert_vm_struct(mm, vmp); + spin_unlock(¤t->mm->page_table_lock); unlock_vma_mappings(vmp); } - -/* Merge the list of memory segments if possible. - * Redundant vm_area_structs are freed. - * This assumes that the list is ordered by address. - * We don't need to traverse the entire list, only those segments - * which intersect or are adjacent to a given interval. - * - * We must already hold the mm semaphore when we get here.. - */ -void merge_segments (struct mm_struct * mm, unsigned long start_addr, unsigned long end_addr) -{ - struct vm_area_struct *prev, *mpnt, *next, *prev1; - - mpnt = find_vma_prev(mm, start_addr, &prev1); - if (!mpnt) - return; - - if (prev1) { - prev = prev1; - } else { - prev = mpnt; - mpnt = mpnt->vm_next; - } - mm->mmap_cache = NULL; /* Kill the cache. */ - - /* prev and mpnt cycle through the list, as long as - * start_addr < mpnt->vm_end && prev->vm_start < end_addr - */ - for ( ; mpnt && prev->vm_start < end_addr ; prev = mpnt, mpnt = next) { - next = mpnt->vm_next; - - /* To share, we must have the same file, operations.. */ - if ((mpnt->vm_file != prev->vm_file)|| - (mpnt->vm_private_data != prev->vm_private_data) || - (mpnt->vm_ops != prev->vm_ops) || - (mpnt->vm_flags != prev->vm_flags) || - (prev->vm_end != mpnt->vm_start)) - continue; - - /* - * If we have a file or it's a shared memory area - * the offsets must be contiguous.. - */ - if ((mpnt->vm_file != NULL) || (mpnt->vm_flags & VM_SHM)) { - unsigned long off = prev->vm_pgoff; - off += (prev->vm_end - prev->vm_start) >> PAGE_SHIFT; - if (off != mpnt->vm_pgoff) - continue; - } - - /* merge prev with mpnt and set up pointers so the new - * big segment can possibly merge with the next one. - * The old unused mpnt is freed. - */ - if (mm->mmap_avl) - avl_remove(mpnt, &mm->mmap_avl); - prev->vm_end = mpnt->vm_end; - prev->vm_next = mpnt->vm_next; - mm->map_count--; - if (mpnt->vm_ops && mpnt->vm_ops->close) { - mpnt->vm_pgoff += (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT; - mpnt->vm_start = mpnt->vm_end; - spin_unlock(&mm->page_table_lock); - mpnt->vm_ops->close(mpnt); - } else - spin_unlock(&mm->page_table_lock); - - lock_vma_mappings(mpnt); - __remove_shared_vm_struct(mpnt); - unlock_vma_mappings(mpnt); - if (mpnt->vm_file) - fput(mpnt->vm_file); - kmem_cache_free(vm_area_cachep, mpnt); - mpnt = prev; - - spin_lock(&mm->page_table_lock); - } -} diff --git a/mm/mprotect.c b/mm/mprotect.c index e47987f1e4c4..91905c8b1f06 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -277,9 +277,6 @@ asmlinkage long sys_mprotect(unsigned long start, size_t len, unsigned long prot break; } } - spin_lock(¤t->mm->page_table_lock); - merge_segments(current->mm, start, end); - spin_unlock(¤t->mm->page_table_lock); out: up(¤t->mm->mmap_sem); return error; diff --git a/mm/mremap.c b/mm/mremap.c index bdbcf4841e58..e237c9442633 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -141,12 +141,7 @@ static inline unsigned long move_vma(struct vm_area_struct * vma, get_file(new_vma->vm_file); if (new_vma->vm_ops && new_vma->vm_ops->open) new_vma->vm_ops->open(new_vma); - lock_vma_mappings(vma); - spin_lock(¤t->mm->page_table_lock); - __insert_vm_struct(current->mm, new_vma); - unlock_vma_mappings(vma); - merge_segments(current->mm, new_vma->vm_start, new_vma->vm_end); - spin_unlock(¤t->mm->page_table_lock); + insert_vm_struct(current->mm, new_vma); do_munmap(current->mm, addr, old_len); current->mm->total_vm += new_len >> PAGE_SHIFT; if (new_vma->vm_flags & VM_LOCKED) { diff --git a/mm/shmem.c b/mm/shmem.c index 1d28691ec043..4e6720fa201d 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -210,37 +210,33 @@ static int shmem_writepage(struct page * page) { int error; struct shmem_inode_info *info; - swp_entry_t *entry; + swp_entry_t *entry, swap; info = &((struct inode *)page->mapping->host)->u.shmem_i; if (info->locked) return 1; + swap = __get_swap_page(2); + if (!swap.val) + return 1; + spin_lock(&info->lock); entry = shmem_swp_entry (info, page->index); if (!entry) /* this had been allocted on page allocation */ BUG(); error = -EAGAIN; - if (entry->val) - goto out; - - /* - * 1 means "cannot write out". - * We can't drop dirty pages - * just because we ran out of - * swap. - */ - error = 1; - *entry = __get_swap_page(2); - if (!entry->val) + if (entry->val) { + __swap_free(swap, 2); goto out; + } + *entry = swap; error = 0; /* Remove the from the page cache */ lru_cache_del(page); remove_inode_page(page); /* Add it to the swap cache */ - add_to_swap_cache(page,*entry); + add_to_swap_cache(page, swap); page_cache_release(page); SetPageDirty(page); info->swapped++; diff --git a/mm/swapfile.c b/mm/swapfile.c index 28963495f6b5..57f815638752 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -370,7 +370,8 @@ static int try_to_unuse(unsigned int type) swap_free(entry); return -ENOMEM; } - delete_from_swap_cache(page); + if (PageSwapCache(page)) + delete_from_swap_cache(page); read_lock(&tasklist_lock); for_each_task(p) unuse_process(p->mm, entry, page); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index 7091bf82ca52..afed5862ea48 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -324,6 +324,7 @@ static struct ipq *ip_frag_create(unsigned hash, struct iphdr *iph) qp->len = 0; qp->meat = 0; qp->fragments = NULL; + qp->iif = 0; /* Initialize a timer for this entry. */ init_timer(&qp->timer); @@ -485,7 +486,8 @@ static void ip_frag_queue(struct ipq *qp, struct sk_buff *skb) else qp->fragments = skb; - qp->iif = skb->dev->ifindex; + if (skb->dev) + qp->iif = skb->dev->ifindex; skb->dev = NULL; qp->meat += skb->len; atomic_add(skb->truesize, &ip_frag_mem); -- 2.39.5