]> git.neil.brown.name Git - history.git/commitdiff
Import 2.3.23pre3 2.3.23pre3
authorLinus Torvalds <torvalds@linuxfoundation.org>
Fri, 23 Nov 2007 20:27:51 +0000 (15:27 -0500)
committerLinus Torvalds <torvalds@linuxfoundation.org>
Fri, 23 Nov 2007 20:27:51 +0000 (15:27 -0500)
80 files changed:
Documentation/Configure.help
arch/i386/config.in
arch/i386/defconfig
arch/i386/kernel/head.S
arch/i386/kernel/irq.c
arch/i386/kernel/setup.c
arch/i386/kernel/smpboot.c
arch/i386/kernel/traps.c
arch/i386/kernel/vm86.c
arch/i386/mm/Makefile
arch/i386/mm/bigmem.c [deleted file]
arch/i386/mm/fault.c
arch/i386/mm/init.c
arch/i386/mm/ioremap.c
drivers/block/ide-dma.c
drivers/block/ide.c
drivers/char/console.c
drivers/char/n_tty.c
drivers/char/serial.c
drivers/char/tty_io.c
drivers/net/eepro100.c
drivers/net/starfire.c
drivers/net/via-rhine.c
drivers/usb/printer.c
fs/buffer.c
fs/dcache.c
fs/exec.c
fs/file.c
fs/inode.c
fs/iobuf.c
fs/nfs/dir.c
fs/nfs/symlink.c
fs/proc/array.c
fs/proc/mem.c
include/asm-i386/bigmem.h [deleted file]
include/asm-i386/bugs.h
include/asm-i386/fixmap.h
include/asm-i386/highmem.h [new file with mode: 0644]
include/asm-i386/io.h
include/asm-i386/page.h
include/asm-i386/pgtable-2level.h [new file with mode: 0644]
include/asm-i386/pgtable-3level.h [new file with mode: 0644]
include/asm-i386/pgtable.h
include/asm-i386/processor.h
include/asm-i386/smp.h
include/linux/bigmem.h [deleted file]
include/linux/binfmts.h
include/linux/bootmem.h [new file with mode: 0644]
include/linux/fs.h
include/linux/highmem.h [new file with mode: 0644]
include/linux/iobuf.h
include/linux/kernel.h
include/linux/mm.h
include/linux/pagemap.h
include/linux/sched.h
include/linux/shm.h
include/linux/slab.h
include/linux/swap.h
include/linux/tty.h
init/main.c
ipc/shm.c
kernel/fork.c
kernel/printk.c
kernel/ptrace.c
mm/Makefile
mm/bigmem.c [deleted file]
mm/bootmem.c [new file with mode: 0644]
mm/filemap.c
mm/highmem.c [new file with mode: 0644]
mm/memory.c
mm/mmap.c
mm/mprotect.c
mm/mremap.c
mm/page_alloc.c
mm/page_io.c
mm/slab.c
mm/swap_state.c
mm/swapfile.c
mm/vmalloc.c
mm/vmscan.c

index 9dc8623641a714156e0e48f1c7788cf133c837c9..619bc25aaaec3057595c100d2bacff450e2edc02 100644 (file)
@@ -175,18 +175,25 @@ CONFIG_MATHEMU
   on the Alpha. The only time you would ever not say Y is to say M in
   order to debug the code. Say Y unless you know what you are doing.
 
-Support for over 1Gig of memory
-CONFIG_BIGMEM
-  Linux can use up to 1 Gigabytes (= 2^30 bytes) of physical memory.
-  If you are compiling a kernel which will never run on a machine with
-  more than 1 Gigabyte, answer N here. Otherwise, say Y.
-
-  The actual amount of physical memory may need to be specified using a
-  kernel command line option such as "mem=256M". (Try "man bootparam"
-  or see the documentation of your boot loader (lilo or loadlin) about
-  how to pass options to the kernel at boot time. The lilo procedure
-  is also explained in the SCSI-HOWTO, available from
-  http://metalab.unc.edu/mdw/linux.html#howto .)
+High Memory support
+CONFIG_NOHIGHMEM
+  If you are compiling a kernel which will never run on a machine
+  with more than 1 Gigabyte total physical RAM, answer "off"
+  here (default choice).
+
+  Linux can use up to 64 Gigabytes of physical memory on x86 systems.
+  High memory is all the physical RAM that could not be directly
+  mapped by the kernel - ie. 3GB if there is 4GB RAM in the system,
+  7GB if there is 8GB RAM in the system.
+
+  If 4 Gigabytes physical RAM or less is used then answer "4GB" here.
+
+  If more than 4 Gigabytes is used then answer "64GB" here. This
+  selection turns Intel PAE (Physical Address Extension) mode on.
+  PAE implements 3-level paging on IA32 processors. PAE is fully
+  supported by Linux, PAE mode is implemented on all recent Intel
+  processors (PPro and better). NOTE: The "64GB" kernel will not
+  boot CPUs that not support PAE!
 
 Normal PC floppy disk support
 CONFIG_BLK_DEV_FD
@@ -12180,18 +12187,44 @@ Include support for the NetWinder
 CONFIG_ARCH_NETWINDER
   Say Y here if you intend to run this kernel on the NetWinder.
 
-Maximum Physical Memory
+Virtual/Physical Memory Split
 CONFIG_1GB
-  Linux can use up to 2 Gigabytes (= 2^31 bytes) of physical memory.
-  If you are compiling a kernel which will never run on a machine with
-  more than 1 Gigabyte, answer "1GB" here. Otherwise, say "2GB".
-
-  The actual amount of physical memory should be specified using a
-  kernel command line option such as "mem=256M". (Try "man bootparam"
-  or see the documentation of your boot loader (lilo or loadlin) about
-  how to pass options to the kernel at boot time. The lilo procedure
-  is also explained in the SCSI-HOWTO, available from
-  http://metalab.unc.edu/mdw/linux.html#howto .)
+  If you are compiling a kernel which will never run on a machine
+  with more than 1 Gigabyte total physical RAM, answer "3GB/1GB"
+  here (default choice).
+
+  On 32-bit x86 systems Linux can use up to 64 Gigabytes of physical
+  memory. However 32-bit x86 processors have only 4 Gigabytes of
+  virtual memory space. This option specifies the maximum amount of
+  virtual memory space one process can potentially use. Certain types
+  of applications (eg. database servers) perform better if they have
+  as much virtual memory per process as possible.
+
+  The remaining part of the 4G virtual memory space is used by the
+  kernel to 'permanently map' as much physical memory as possible.
+  Certain types of applications perform better if there is more
+  'permanently mapped' kernel memory.
+
+  [WARNING! Certain boards do not support PCI DMA to physical addresses
+  bigger than 2 Gigabytes. Non-DMA-able memory must not be permanently
+  mapped by the kernel, thus a 1G/3G split will not work on such boxes.]
+
+  As you can see there is no 'perfect split' - the fundamental
+  problem is that 4G of 32-bit virtual memory space is short. So
+  you'll have to pick your own choice - depending on the application
+  load of your box. A 2G/2G split is typically a good choice for a
+  generic Linux server with lots of RAM.
+
+  Any potentially remaining (not permanently mapped) part of physical
+  memory is called 'high memory'. How much total high memory the kernel
+  can handle is influenced by the (next) High Memory configuration option.
+
+  The actual amount of total physical memory will either be
+  autodetected or can be forced by using a kernel command line option
+  such as "mem=256M". (Try "man bootparam" or see the documentation of
+  your boot loader (lilo or loadlin) about how to pass options to the
+  kernel at boot time. The lilo procedure is also explained in the
+  SCSI-HOWTO, available from http://metalab.unc.edu/mdw/linux.html#howto .)
 
 Math emulation
 CONFIG_NWFPE
@@ -12802,7 +12835,7 @@ CONFIG_KHTTPD
 # LocalWords:  KERNNAME kname ktype kernelname Kerneltype KERNTYPE Alt RX mdafb
 # LocalWords:  dataless kerneltype SYSNAME Comtrol Rocketport palmtop fbset EGS
 # LocalWords:  nvram SYSRQ SysRq PrintScreen sysrq NVRAMs NvRAM Shortwave RTTY
-# LocalWords:  Sitor Amtor Pactor GTOR hayes TX TMOUT JFdocs BIGMEM DAC IRQ's
+# LocalWords:  Sitor Amtor Pactor GTOR hayes TX TMOUT JFdocs HIGHMEM DAC IRQ's
 # LocalWords:  IDEPCI IDEDMA idedma PDC pdc TRM trm raidtools luthien nuclecu
 # LocalWords:  unam mx miguel koobera uic EMUL solaris pp ieee lpsg co DMAs TOS
 # LocalWords:  BLDCONFIG preloading jumperless BOOTINIT modutils multipath GRE
index 529b648beeab394e84ec4061434599feaa2ca99f..b8f081e65f00b6283566d119d7c013f80ca21df7 100644 (file)
@@ -42,6 +42,18 @@ if [ "$CONFIG_MK7" = "y" ]; then
    define_bool CONFIG_X86_USE_3DNOW y
 fi
 
+choice 'High Memory Support' \
+       "off    CONFIG_NOHIGHMEM \
+       4GB    CONFIG_HIGHMEM4G \
+       64GB   CONFIG_HIGHMEM64G" off
+if [ "$CONFIG_HIGHMEM4G" = "y" ]; then
+   define_bool CONFIG_HIGHMEM y
+fi
+if [ "$CONFIG_HIGHMEM64G" = "y" ]; then
+   define_bool CONFIG_HIGHMEM y
+   define_bool CONFIG_X86_PAE y
+fi
+
 bool 'Math emulation' CONFIG_MATH_EMULATION
 bool 'MTRR (Memory Type Range Register) support' CONFIG_MTRR
 bool 'Symmetric multi-processing support' CONFIG_SMP
@@ -59,7 +71,6 @@ endmenu
 mainmenu_option next_comment
 comment 'General setup'
 
-bool 'Support for over 1Gig of memory' CONFIG_BIGMEM
 bool 'Networking support' CONFIG_NET
 bool 'SGI Visual Workstation support' CONFIG_VISWS
 if [ "$CONFIG_VISWS" = "y" ]; then
index 49137cda44ff8a62bc0368ca192cc33ce3a1addf..d6821a99333db08308a23dee4f5037afb4a85b90 100644 (file)
@@ -24,8 +24,9 @@ CONFIG_X86_BSWAP=y
 CONFIG_X86_POPAD_OK=y
 CONFIG_X86_TSC=y
 CONFIG_X86_GOOD_APIC=y
-CONFIG_1GB=y
-# CONFIG_2GB is not set
+CONFIG_NOHIGHMEM=y
+# CONFIG_HIGHMEM4G is not set
+# CONFIG_HIGHMEM64G is not set
 # CONFIG_MATH_EMULATION is not set
 # CONFIG_MTRR is not set
 CONFIG_SMP=y
@@ -40,7 +41,6 @@ CONFIG_MODULES=y
 #
 # General setup
 #
-# CONFIG_BIGMEM is not set
 CONFIG_NET=y
 # CONFIG_VISWS is not set
 CONFIG_X86_IO_APIC=y
@@ -111,7 +111,7 @@ CONFIG_BLK_DEV_IDEPCI=y
 # CONFIG_BLK_DEV_OFFBOARD is not set
 # CONFIG_BLK_DEV_AEC6210 is not set
 CONFIG_BLK_DEV_PIIX=y
-# CONFIG_BLK_DEV_SIS5513 is not set
+# CONFIG_BLK_DEV_PIIX_TUNING is not set
 # CONFIG_IDE_CHIPSETS is not set
 # CONFIG_BLK_CPQ_DA is not set
 
index f1aa5058631756b5e7a43f339788e29e1d6d2a25..423308aae0b635c1c7cf0be67087eb9d861a1d3e 100644 (file)
@@ -367,11 +367,13 @@ SYMBOL_NAME(gdt):
 .org 0x1000
 ENTRY(swapper_pg_dir)
        .long 0x00102007
-       .fill __USER_PGD_PTRS-1,4,0
-       /* default: 767 entries */
+       .long 0x00103007
+       .fill BOOT_USER_PGD_PTRS-2,4,0
+       /* default: 766 entries */
        .long 0x00102007
-       /* default: 255 entries */
-       .fill __KERNEL_PGD_PTRS-1,4,0
+       .long 0x00103007
+       /* default: 254 entries */
+       .fill BOOT_KERNEL_PGD_PTRS-2,4,0
 
 /*
  * The page tables are initialized to only 4MB here - the final page
@@ -509,16 +511,156 @@ ENTRY(pg0)
        .long 0x3f0007,0x3f1007,0x3f2007,0x3f3007,0x3f4007,0x3f5007,0x3f6007,0x3f7007
        .long 0x3f8007,0x3f9007,0x3fa007,0x3fb007,0x3fc007,0x3fd007,0x3fe007,0x3ff007
 
-.org 0x3000
-ENTRY(empty_bad_page)
-
+ENTRY(pg1)
+       .long 0x400007,0x001007,0x002007,0x003007,0x004007,0x005007,0x006007,0x007007
+       .long 0x408007,0x009007,0x00a007,0x00b007,0x00c007,0x00d007,0x00e007,0x00f007
+       .long 0x410007,0x011007,0x012007,0x013007,0x014007,0x015007,0x016007,0x017007
+       .long 0x418007,0x019007,0x01a007,0x01b007,0x01c007,0x01d007,0x01e007,0x01f007
+       .long 0x420007,0x021007,0x022007,0x023007,0x024007,0x025007,0x026007,0x027007
+       .long 0x428007,0x029007,0x02a007,0x02b007,0x02c007,0x02d007,0x02e007,0x02f007
+       .long 0x430007,0x031007,0x032007,0x033007,0x034007,0x035007,0x036007,0x037007
+       .long 0x438007,0x039007,0x03a007,0x03b007,0x03c007,0x03d007,0x03e007,0x03f007
+       .long 0x440007,0x041007,0x042007,0x043007,0x044007,0x045007,0x046007,0x047007
+       .long 0x448007,0x049007,0x04a007,0x04b007,0x04c007,0x04d007,0x04e007,0x04f007
+       .long 0x450007,0x051007,0x052007,0x053007,0x054007,0x055007,0x056007,0x057007
+       .long 0x458007,0x059007,0x05a007,0x05b007,0x05c007,0x05d007,0x05e007,0x05f007
+       .long 0x460007,0x061007,0x062007,0x063007,0x064007,0x065007,0x066007,0x067007
+       .long 0x468007,0x069007,0x06a007,0x06b007,0x06c007,0x06d007,0x06e007,0x06f007
+       .long 0x470007,0x071007,0x072007,0x073007,0x074007,0x075007,0x076007,0x077007
+       .long 0x478007,0x079007,0x07a007,0x07b007,0x07c007,0x07d007,0x07e007,0x07f007
+       .long 0x480007,0x081007,0x082007,0x083007,0x084007,0x085007,0x086007,0x087007
+       .long 0x488007,0x089007,0x08a007,0x08b007,0x08c007,0x08d007,0x08e007,0x08f007
+       .long 0x490007,0x091007,0x092007,0x093007,0x094007,0x095007,0x096007,0x097007
+       .long 0x498007,0x099007,0x09a007,0x09b007,0x09c007,0x09d007,0x09e007,0x09f007
+       .long 0x4a0007,0x0a1007,0x0a2007,0x0a3007,0x0a4007,0x0a5007,0x0a6007,0x0a7007
+       .long 0x4a8007,0x0a9007,0x0aa007,0x0ab007,0x0ac007,0x0ad007,0x0ae007,0x0af007
+       .long 0x4b0007,0x0b1007,0x0b2007,0x0b3007,0x0b4007,0x0b5007,0x0b6007,0x0b7007
+       .long 0x4b8007,0x0b9007,0x0ba007,0x0bb007,0x0bc007,0x0bd007,0x0be007,0x0bf007
+       .long 0x4c0007,0x0c1007,0x0c2007,0x0c3007,0x0c4007,0x0c5007,0x0c6007,0x0c7007
+       .long 0x4c8007,0x0c9007,0x0ca007,0x0cb007,0x0cc007,0x0cd007,0x0ce007,0x0cf007
+       .long 0x4d0007,0x0d1007,0x0d2007,0x0d3007,0x0d4007,0x0d5007,0x0d6007,0x0d7007
+       .long 0x4d8007,0x0d9007,0x0da007,0x0db007,0x0dc007,0x0dd007,0x0de007,0x0df007
+       .long 0x4e0007,0x0e1007,0x0e2007,0x0e3007,0x0e4007,0x0e5007,0x0e6007,0x0e7007
+       .long 0x4e8007,0x0e9007,0x0ea007,0x0eb007,0x0ec007,0x0ed007,0x0ee007,0x0ef007
+       .long 0x4f0007,0x0f1007,0x0f2007,0x0f3007,0x0f4007,0x0f5007,0x0f6007,0x0f7007
+       .long 0x4f8007,0x0f9007,0x0fa007,0x0fb007,0x0fc007,0x0fd007,0x0fe007,0x0ff007
+       .long 0x500007,0x001007,0x002007,0x003007,0x004007,0x005007,0x006007,0x007007
+       .long 0x508007,0x009007,0x00a007,0x00b007,0x00c007,0x00d007,0x00e007,0x00f007
+       .long 0x510007,0x011007,0x012007,0x013007,0x014007,0x015007,0x016007,0x017007
+       .long 0x518007,0x019007,0x01a007,0x01b007,0x01c007,0x01d007,0x01e007,0x01f007
+       .long 0x520007,0x021007,0x022007,0x023007,0x024007,0x025007,0x026007,0x027007
+       .long 0x528007,0x029007,0x02a007,0x02b007,0x02c007,0x02d007,0x02e007,0x02f007
+       .long 0x530007,0x031007,0x032007,0x033007,0x034007,0x035007,0x036007,0x037007
+       .long 0x538007,0x039007,0x03a007,0x03b007,0x03c007,0x03d007,0x03e007,0x03f007
+       .long 0x540007,0x041007,0x042007,0x043007,0x044007,0x045007,0x046007,0x047007
+       .long 0x548007,0x049007,0x04a007,0x04b007,0x04c007,0x04d007,0x04e007,0x04f007
+       .long 0x550007,0x051007,0x052007,0x053007,0x054007,0x055007,0x056007,0x057007
+       .long 0x558007,0x059007,0x05a007,0x05b007,0x05c007,0x05d007,0x05e007,0x05f007
+       .long 0x560007,0x061007,0x062007,0x063007,0x064007,0x065007,0x066007,0x067007
+       .long 0x568007,0x069007,0x06a007,0x06b007,0x06c007,0x06d007,0x06e007,0x06f007
+       .long 0x570007,0x071007,0x072007,0x073007,0x074007,0x075007,0x076007,0x077007
+       .long 0x578007,0x079007,0x07a007,0x07b007,0x07c007,0x07d007,0x07e007,0x07f007
+       .long 0x580007,0x081007,0x082007,0x083007,0x084007,0x085007,0x086007,0x087007
+       .long 0x588007,0x089007,0x08a007,0x08b007,0x08c007,0x08d007,0x08e007,0x08f007
+       .long 0x590007,0x091007,0x092007,0x093007,0x094007,0x095007,0x096007,0x097007
+       .long 0x598007,0x099007,0x09a007,0x09b007,0x09c007,0x09d007,0x09e007,0x09f007
+       .long 0x5a0007,0x0a1007,0x0a2007,0x0a3007,0x0a4007,0x0a5007,0x0a6007,0x0a7007
+       .long 0x5a8007,0x0a9007,0x0aa007,0x0ab007,0x0ac007,0x0ad007,0x0ae007,0x0af007
+       .long 0x5b0007,0x0b1007,0x0b2007,0x0b3007,0x0b4007,0x0b5007,0x0b6007,0x0b7007
+       .long 0x5b8007,0x0b9007,0x0ba007,0x0bb007,0x0bc007,0x0bd007,0x0be007,0x0bf007
+       .long 0x5c0007,0x0c1007,0x0c2007,0x0c3007,0x0c4007,0x0c5007,0x0c6007,0x0c7007
+       .long 0x5c8007,0x0c9007,0x0ca007,0x0cb007,0x0cc007,0x0cd007,0x0ce007,0x0cf007
+       .long 0x5d0007,0x0d1007,0x0d2007,0x0d3007,0x0d4007,0x0d5007,0x0d6007,0x0d7007
+       .long 0x5d8007,0x0d9007,0x0da007,0x0db007,0x0dc007,0x0dd007,0x0de007,0x0df007
+       .long 0x5e0007,0x0e1007,0x0e2007,0x0e3007,0x0e4007,0x0e5007,0x0e6007,0x0e7007
+       .long 0x5e8007,0x0e9007,0x0ea007,0x0eb007,0x0ec007,0x0ed007,0x0ee007,0x0ef007
+       .long 0x5f0007,0x0f1007,0x0f2007,0x0f3007,0x0f4007,0x0f5007,0x0f6007,0x0f7007
+       .long 0x5f8007,0x0f9007,0x0fa007,0x0fb007,0x0fc007,0x0fd007,0x0fe007,0x0ff007
+       .long 0x600007,0x001007,0x002007,0x003007,0x004007,0x005007,0x006007,0x007007
+       .long 0x608007,0x009007,0x00a007,0x00b007,0x00c007,0x00d007,0x00e007,0x00f007
+       .long 0x610007,0x011007,0x012007,0x013007,0x014007,0x015007,0x016007,0x017007
+       .long 0x618007,0x019007,0x01a007,0x01b007,0x01c007,0x01d007,0x01e007,0x01f007
+       .long 0x620007,0x021007,0x022007,0x023007,0x024007,0x025007,0x026007,0x027007
+       .long 0x628007,0x029007,0x02a007,0x02b007,0x02c007,0x02d007,0x02e007,0x02f007
+       .long 0x630007,0x031007,0x032007,0x033007,0x034007,0x035007,0x036007,0x037007
+       .long 0x638007,0x039007,0x03a007,0x03b007,0x03c007,0x03d007,0x03e007,0x03f007
+       .long 0x640007,0x041007,0x042007,0x043007,0x044007,0x045007,0x046007,0x047007
+       .long 0x648007,0x049007,0x04a007,0x04b007,0x04c007,0x04d007,0x04e007,0x04f007
+       .long 0x650007,0x051007,0x052007,0x053007,0x054007,0x055007,0x056007,0x057007
+       .long 0x658007,0x059007,0x05a007,0x05b007,0x05c007,0x05d007,0x05e007,0x05f007
+       .long 0x660007,0x061007,0x062007,0x063007,0x064007,0x065007,0x066007,0x067007
+       .long 0x668007,0x069007,0x06a007,0x06b007,0x06c007,0x06d007,0x06e007,0x06f007
+       .long 0x670007,0x071007,0x072007,0x073007,0x074007,0x075007,0x076007,0x077007
+       .long 0x678007,0x079007,0x07a007,0x07b007,0x07c007,0x07d007,0x07e007,0x07f007
+       .long 0x680007,0x081007,0x082007,0x083007,0x084007,0x085007,0x086007,0x087007
+       .long 0x688007,0x089007,0x08a007,0x08b007,0x08c007,0x08d007,0x08e007,0x08f007
+       .long 0x690007,0x091007,0x092007,0x093007,0x094007,0x095007,0x096007,0x097007
+       .long 0x698007,0x099007,0x09a007,0x09b007,0x09c007,0x09d007,0x09e007,0x09f007
+       .long 0x6a0007,0x0a1007,0x0a2007,0x0a3007,0x0a4007,0x0a5007,0x0a6007,0x0a7007
+       .long 0x6a8007,0x0a9007,0x0aa007,0x0ab007,0x0ac007,0x0ad007,0x0ae007,0x0af007
+       .long 0x6b0007,0x0b1007,0x0b2007,0x0b3007,0x0b4007,0x0b5007,0x0b6007,0x0b7007
+       .long 0x6b8007,0x0b9007,0x0ba007,0x0bb007,0x0bc007,0x0bd007,0x0be007,0x0bf007
+       .long 0x6c0007,0x0c1007,0x0c2007,0x0c3007,0x0c4007,0x0c5007,0x0c6007,0x0c7007
+       .long 0x6c8007,0x0c9007,0x0ca007,0x0cb007,0x0cc007,0x0cd007,0x0ce007,0x0cf007
+       .long 0x6d0007,0x0d1007,0x0d2007,0x0d3007,0x0d4007,0x0d5007,0x0d6007,0x0d7007
+       .long 0x6d8007,0x0d9007,0x0da007,0x0db007,0x0dc007,0x0dd007,0x0de007,0x0df007
+       .long 0x6e0007,0x0e1007,0x0e2007,0x0e3007,0x0e4007,0x0e5007,0x0e6007,0x0e7007
+       .long 0x6e8007,0x0e9007,0x0ea007,0x0eb007,0x0ec007,0x0ed007,0x0ee007,0x0ef007
+       .long 0x6f0007,0x0f1007,0x0f2007,0x0f3007,0x0f4007,0x0f5007,0x0f6007,0x0f7007
+       .long 0x6f8007,0x0f9007,0x0fa007,0x0fb007,0x0fc007,0x0fd007,0x0fe007,0x0ff007
+       .long 0x700007,0x001007,0x002007,0x003007,0x004007,0x005007,0x006007,0x007007
+       .long 0x708007,0x009007,0x00a007,0x00b007,0x00c007,0x00d007,0x00e007,0x00f007
+       .long 0x710007,0x011007,0x012007,0x013007,0x014007,0x015007,0x016007,0x017007
+       .long 0x718007,0x019007,0x01a007,0x01b007,0x01c007,0x01d007,0x01e007,0x01f007
+       .long 0x720007,0x021007,0x022007,0x023007,0x024007,0x025007,0x026007,0x027007
+       .long 0x728007,0x029007,0x02a007,0x02b007,0x02c007,0x02d007,0x02e007,0x02f007
+       .long 0x730007,0x031007,0x032007,0x033007,0x034007,0x035007,0x036007,0x037007
+       .long 0x738007,0x039007,0x03a007,0x03b007,0x03c007,0x03d007,0x03e007,0x03f007
+       .long 0x740007,0x041007,0x042007,0x043007,0x044007,0x045007,0x046007,0x047007
+       .long 0x748007,0x049007,0x04a007,0x04b007,0x04c007,0x04d007,0x04e007,0x04f007
+       .long 0x750007,0x051007,0x052007,0x053007,0x054007,0x055007,0x056007,0x057007
+       .long 0x758007,0x059007,0x05a007,0x05b007,0x05c007,0x05d007,0x05e007,0x05f007
+       .long 0x760007,0x061007,0x062007,0x063007,0x064007,0x065007,0x066007,0x067007
+       .long 0x768007,0x069007,0x06a007,0x06b007,0x06c007,0x06d007,0x06e007,0x06f007
+       .long 0x770007,0x071007,0x072007,0x073007,0x074007,0x075007,0x076007,0x077007
+       .long 0x778007,0x079007,0x07a007,0x07b007,0x07c007,0x07d007,0x07e007,0x07f007
+       .long 0x780007,0x081007,0x082007,0x083007,0x084007,0x085007,0x086007,0x087007
+       .long 0x788007,0x089007,0x08a007,0x08b007,0x08c007,0x08d007,0x08e007,0x08f007
+       .long 0x790007,0x091007,0x092007,0x093007,0x094007,0x095007,0x096007,0x097007
+       .long 0x798007,0x099007,0x09a007,0x09b007,0x09c007,0x09d007,0x09e007,0x09f007
+       .long 0x7a0007,0x0a1007,0x0a2007,0x0a3007,0x0a4007,0x0a5007,0x0a6007,0x0a7007
+       .long 0x7a8007,0x0a9007,0x0aa007,0x0ab007,0x0ac007,0x0ad007,0x0ae007,0x0af007
+       .long 0x7b0007,0x0b1007,0x0b2007,0x0b3007,0x0b4007,0x0b5007,0x0b6007,0x0b7007
+       .long 0x7b8007,0x0b9007,0x0ba007,0x0bb007,0x0bc007,0x0bd007,0x0be007,0x0bf007
+       .long 0x7c0007,0x0c1007,0x0c2007,0x0c3007,0x0c4007,0x0c5007,0x0c6007,0x0c7007
+       .long 0x7c8007,0x0c9007,0x0ca007,0x0cb007,0x0cc007,0x0cd007,0x0ce007,0x0cf007
+       .long 0x7d0007,0x0d1007,0x0d2007,0x0d3007,0x0d4007,0x0d5007,0x0d6007,0x0d7007
+       .long 0x7d8007,0x0d9007,0x0da007,0x0db007,0x0dc007,0x0dd007,0x0de007,0x0df007
+       .long 0x7e0007,0x0e1007,0x0e2007,0x0e3007,0x0e4007,0x0e5007,0x0e6007,0x0e7007
+       .long 0x7e8007,0x0e9007,0x0ea007,0x0eb007,0x0ec007,0x0ed007,0x0ee007,0x0ef007
+       .long 0x7f0007,0x0f1007,0x0f2007,0x0f3007,0x0f4007,0x0f5007,0x0f6007,0x0f7007
+       .long 0x7f8007,0x0f9007,0x0fa007,0x0fb007,0x0fc007,0x0fd007,0x0fe007,0x0ff007
 .org 0x4000
-ENTRY(empty_bad_page_table)
+ENTRY(empty_zero_page)
 
 .org 0x5000
-ENTRY(empty_zero_page)
+ENTRY(empty_bad_page)
 
 .org 0x6000
+ENTRY(empty_bad_pte_table)
+
+#if CONFIG_X86_PAE
+
+ .org 0x7000
+ ENTRY(empty_bad_pmd_table)
+
+ .org 0x8000
+
+#else
+
+ .org 0x7000
+
+#endif
 
 /*
  * This starts the data section. Note that the above is all
index 8ec32928712264561b5ac093890bcf10f7211427..75659aac40dcd178af7b69152c18122a9342847d 100644 (file)
@@ -20,6 +20,7 @@
  * Naturally it's not a 1:1 relation, but there are similarities.
  */
 
+#include <linux/config.h>
 #include <linux/ptrace.h>
 #include <linux/errno.h>
 #include <linux/signal.h>
index 734cfca65e8458f6b5f8bfea16db38781ca30a7c..2ddad6ff6d7344426a2a08162e67bb6daefb46a6 100644 (file)
@@ -54,7 +54,8 @@
 #ifdef CONFIG_BLK_DEV_RAM
 #include <linux/blk.h>
 #endif
-#include <linux/bigmem.h>
+#include <linux/highmem.h>
+#include <linux/bootmem.h>
 #include <asm/processor.h>
 #include <linux/console.h>
 #include <asm/uaccess.h>
@@ -403,10 +404,9 @@ void __init add_memory_region(unsigned long start,
 
 #define LOWMEMSIZE()   ((*(unsigned short *)__va(0x413)) * 1024)
 
-
 void __init setup_memory_region(void)
 {
-#define E820_DEBUG     0
+#define E820_DEBUG     1
 #ifdef E820_DEBUG
        int i;
 #endif
@@ -432,9 +432,8 @@ void __init setup_memory_region(void)
                memcpy(e820.map, E820_MAP, e820.nr_map * sizeof e820.map[0]);
 #ifdef E820_DEBUG
                for (i=0; i < e820.nr_map; i++) {
-                       printk("e820: %ld @ %08lx ",
-                               (unsigned long)(e820.map[i].size),
-                               (unsigned long)(e820.map[i].addr));
+                       printk("e820: %08x @ %08x ", (int)e820.map[i].size,
+                                               (int)e820.map[i].addr);
                        switch (e820.map[i].type) {
                        case E820_RAM:  printk("(usable)\n");
                                        break;
@@ -464,48 +463,11 @@ void __init setup_memory_region(void)
 } /* setup_memory_region */
 
 
-void __init setup_arch(char **cmdline_p, unsigned long * memory_start_p, unsigned long * memory_end_p)
+static inline void parse_mem_cmdline (char ** cmdline_p)
 {
-       unsigned long high_pfn, max_pfn;
        char c = ' ', *to = command_line, *from = COMMAND_LINE;
        int len = 0;
-       int i;
-       int usermem=0;
-
-#ifdef CONFIG_VISWS
-       visws_get_board_type_and_rev();
-#endif
-
-       ROOT_DEV = to_kdev_t(ORIG_ROOT_DEV);
-       drive_info = DRIVE_INFO;
-       screen_info = SCREEN_INFO;
-       apm_bios_info = APM_BIOS_INFO;
-       if( SYS_DESC_TABLE.length != 0 ) {
-               MCA_bus = SYS_DESC_TABLE.table[3] &0x2;
-               machine_id = SYS_DESC_TABLE.table[0];
-               machine_submodel_id = SYS_DESC_TABLE.table[1];
-               BIOS_revision = SYS_DESC_TABLE.table[2];
-       }
-       aux_device_present = AUX_DEVICE_INFO;
-
-#ifdef CONFIG_BLK_DEV_RAM
-       rd_image_start = RAMDISK_FLAGS & RAMDISK_IMAGE_START_MASK;
-       rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0);
-       rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0);
-#endif
-       setup_memory_region();
-
-       if (!MOUNT_ROOT_RDONLY)
-               root_mountflags &= ~MS_RDONLY;
-       init_mm.start_code = (unsigned long) &_text;
-       init_mm.end_code = (unsigned long) &_etext;
-       init_mm.end_data = (unsigned long) &_edata;
-       init_mm.brk = (unsigned long) &_end;
-
-       code_resource.start = virt_to_bus(&_text);
-       code_resource.end = virt_to_bus(&_etext)-1;
-       data_resource.start = virt_to_bus(&_etext);
-       data_resource.end = virt_to_bus(&_edata)-1;
+       int usermem = 0;
 
        /* Save unparsed command line copy for /proc/cmdline */
        memcpy(saved_command_line, COMMAND_LINE, COMMAND_LINE_SIZE);
@@ -519,8 +481,9 @@ void __init setup_arch(char **cmdline_p, unsigned long * memory_start_p, unsigne
                 * "mem=XXX[KkmM]@XXX[KkmM]" defines a memory region from
                 * <start> to <start>+<mem>, overriding the bios size.
                 */
-               if (c == ' ' && *(const unsigned long *)from == *(const unsigned long *)"mem=") {
-                       if (to != command_line) to--;
+               if (c == ' ' && !memcmp(from, "mem=", 4)) {
+                       if (to != command_line)
+                               to--;
                        if (!memcmp(from+4, "nopentium", 9)) {
                                from += 9+4;
                                boot_cpu_data.x86_capability &= ~X86_FEATURE_PSE;
@@ -542,7 +505,7 @@ void __init setup_arch(char **cmdline_p, unsigned long * memory_start_p, unsigne
                                }
                                mem_size = memparse(from+4, &from);
                                if (*from == '@')
-                                       start_at = memparse(from+1,&from);
+                                       start_at = memparse(from+1, &from);
                                else {
                                        start_at = HIGH_MEMORY;
                                        mem_size -= HIGH_MEMORY;
@@ -559,54 +522,158 @@ void __init setup_arch(char **cmdline_p, unsigned long * memory_start_p, unsigne
        }
        *to = '\0';
        *cmdline_p = command_line;
+}
 
-       /* Find the highest page frame number we have available */
-       max_pfn = 0;
-       for (i=0; i < e820.nr_map; i++) {
-               /* RAM? */
-               if (e820.map[i].type == E820_RAM) {
-                       unsigned long end_pfn = (e820.map[i].addr + e820.map[i].size) >> PAGE_SHIFT;
+void __init setup_arch(char **cmdline_p)
+{
+       unsigned long bootmap_size;
+       unsigned long start_pfn, max_pfn, max_low_pfn;
+       int i;
 
-                       if (end_pfn > max_pfn)
-                               max_pfn = end_pfn;
-               }
+#ifdef CONFIG_VISWS
+       visws_get_board_type_and_rev();
+#endif
+
+       ROOT_DEV = to_kdev_t(ORIG_ROOT_DEV);
+       drive_info = DRIVE_INFO;
+       screen_info = SCREEN_INFO;
+       apm_bios_info = APM_BIOS_INFO;
+       if( SYS_DESC_TABLE.length != 0 ) {
+               MCA_bus = SYS_DESC_TABLE.table[3] &0x2;
+               machine_id = SYS_DESC_TABLE.table[0];
+               machine_submodel_id = SYS_DESC_TABLE.table[1];
+               BIOS_revision = SYS_DESC_TABLE.table[2];
        }
+       aux_device_present = AUX_DEVICE_INFO;
 
-/*
- * We can only allocate a limited amount of direct-mapped memory
- */
-#define VMALLOC_RESERVE        (128 << 20)     /* 128MB for vmalloc and initrd */
-#define MAXMEM         ((unsigned long)(-PAGE_OFFSET-VMALLOC_RESERVE))
-#define MAXMEM_PFN     (MAXMEM >> PAGE_SHIFT)
+#ifdef CONFIG_BLK_DEV_RAM
+       rd_image_start = RAMDISK_FLAGS & RAMDISK_IMAGE_START_MASK;
+       rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0);
+       rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0);
+#endif
+       setup_memory_region();
 
-       high_pfn = MAXMEM_PFN;
-       if (max_pfn < high_pfn)
-               high_pfn = max_pfn;
+       if (!MOUNT_ROOT_RDONLY)
+               root_mountflags &= ~MS_RDONLY;
+       init_mm.start_code = (unsigned long) &_text;
+       init_mm.end_code = (unsigned long) &_etext;
+       init_mm.end_data = (unsigned long) &_edata;
+       init_mm.brk = (unsigned long) &_end;
+
+       code_resource.start = virt_to_bus(&_text);
+       code_resource.end = virt_to_bus(&_etext)-1;
+       data_resource.start = virt_to_bus(&_etext);
+       data_resource.end = virt_to_bus(&_edata)-1;
+
+       parse_mem_cmdline(cmdline_p);
+
+#define PFN_UP(x)      (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
+#define PFN_DOWN(x)    ((x) >> PAGE_SHIFT)
+#define PFN_PHYS(x)    ((x) << PAGE_SHIFT)
 
 /*
- * But the bigmem stuff may be able to use more of it
- * (but currently only up to about 4GB)
+ * 128MB for vmalloc and initrd
  */
-#ifdef CONFIG_BIGMEM
-       #define MAXBIGMEM       ((unsigned long)(~(VMALLOC_RESERVE-1)))
-       #define MAXBIGMEM_PFN   (MAXBIGMEM >> PAGE_SHIFT)
-       if (max_pfn > MAX_PFN)
-               max_pfn = MAX_PFN;
-
-/* When debugging, make half of "normal" memory be BIGMEM memory instead */
-#ifdef BIGMEM_DEBUG
-       high_pfn >>= 1;
-#endif
+#define VMALLOC_RESERVE        (unsigned long)(128 << 20)
+#define MAXMEM         (unsigned long)(-PAGE_OFFSET-VMALLOC_RESERVE)
+#define MAXMEM_PFN     PFN_DOWN(MAXMEM)
+
+       /*
+        * partially used pages are not usable - thus
+        * we are rounding upwards:
+        */
+       start_pfn = PFN_UP(__pa(&_end));
+
+       /*
+        * Find the highest page frame number we have available
+        */
+       max_pfn = 0;
+       for (i = 0; i < e820.nr_map; i++) {
+               unsigned long curr_pfn;
+               /* RAM? */
+               if (e820.map[i].type != E820_RAM)
+                       continue;
+               curr_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
+               if (curr_pfn > max_pfn)
+                       max_pfn = curr_pfn;
+       }
 
-       bigmem_start = high_pfn << PAGE_SHIFT;
-       bigmem_end = max_pfn << PAGE_SHIFT;
-       printk(KERN_NOTICE "%ldMB BIGMEM available.\n", (bigmem_end-bigmem_start) >> 20);
+       /*
+        * Determine low and high memory ranges:
+        */
+       max_low_pfn = max_pfn;
+       if (max_low_pfn > MAXMEM_PFN)
+               max_low_pfn = MAXMEM_PFN;
+
+#ifdef CONFIG_HIGHMEM
+       highstart_pfn = highend_pfn = max_pfn;
+       if (max_pfn > MAXMEM_PFN) {
+               highstart_pfn = MAXMEM_PFN;
+               highend_pfn = max_pfn;
+               printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
+                       pages_to_mb(highend_pfn - highstart_pfn));
+       }
 #endif
+       /*
+        * Initialize the boot-time allocator (with low memory only):
+        */
+       bootmap_size = init_bootmem(start_pfn, max_low_pfn);
 
-       ram_resources[1].end = (high_pfn << PAGE_SHIFT)-1;
+       /*
+        * FIXME: what about high memory?
+        */
+       ram_resources[1].end = PFN_PHYS(max_low_pfn);
 
-       *memory_start_p = (unsigned long) &_end;
-       *memory_end_p = PAGE_OFFSET + (high_pfn << PAGE_SHIFT);
+       /*
+        * Register fully available low RAM pages with the bootmem allocator.
+        */
+       for (i = 0; i < e820.nr_map; i++) {
+               unsigned long curr_pfn, last_pfn, size;
+               /*
+                * Reserve usable low memory
+                */
+               if (e820.map[i].type != E820_RAM)
+                       continue;
+               /*
+                * We are rounding up the start address of usable memory:
+                */
+               curr_pfn = PFN_UP(e820.map[i].addr);
+               if (curr_pfn >= max_low_pfn)
+                       continue;
+               /*
+                * ... and at the end of the usable range downwards:
+                */
+               last_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
+
+               if (last_pfn > max_low_pfn)
+                       last_pfn = max_low_pfn;
+               size = last_pfn - curr_pfn;
+               free_bootmem(PFN_PHYS(curr_pfn), PFN_PHYS(size));
+       }
+       /*
+        * Reserve the bootmem bitmap itself as well. We do this in two
+        * steps (first step was init_bootmem()) because this catches
+        * the (very unlikely) case of us accidentally initializing the
+        * bootmem allocator with an invalid RAM area.
+        */
+       reserve_bootmem(HIGH_MEMORY, (PFN_PHYS(start_pfn) +
+                        bootmap_size + PAGE_SIZE-1) - (HIGH_MEMORY));
+
+       /*
+        * reserve physical page 0 - it's a special BIOS page on many boxes,
+        * enabling clean reboots, SMP operation, laptop functions.
+        */
+       reserve_bootmem(0, PAGE_SIZE);
+
+#ifdef __SMP__
+       /*
+        * But first pinch a few for the stack/trampoline stuff
+        * FIXME: Don't need the extra page at 4K, but need to fix
+        * trampoline before removing it. (see the GDT stuff)
+        */
+       reserve_bootmem(PAGE_SIZE, PAGE_SIZE);
+       smp_alloc_memory(); /* AP processor realmode stacks in low memory*/
+#endif
 
 #ifdef __SMP__
        /*
@@ -616,10 +683,11 @@ void __init setup_arch(char **cmdline_p, unsigned long * memory_start_p, unsigne
 #endif
 
 #ifdef CONFIG_BLK_DEV_INITRD
+// FIXME needs to do the new bootmem alloc stuff
        if (LOADER_TYPE) {
                initrd_start = INITRD_START ? INITRD_START + PAGE_OFFSET : 0;
                initrd_end = initrd_start+INITRD_SIZE;
-               if (initrd_end > memory_end) {
+               if (initrd_end > (max_low_pfn << PAGE_SHIFT)) {
                        printk("initrd extends beyond end of memory "
                            "(0x%08lx > 0x%08lx)\ndisabling initrd\n",
                            initrd_end,memory_end);
index 46335ee8fa9cef5a0b0cad5077edf8262a470ab0..f0b3b371e2ea6dbdd87daf07a4a0bd1aa388605d 100644 (file)
@@ -39,6 +39,7 @@
 #include <linux/kernel_stat.h>
 #include <linux/smp_lock.h>
 #include <linux/irq.h>
+#include <linux/bootmem.h>
 
 #include <linux/delay.h>
 #include <linux/mc146818rtc.h>
@@ -630,12 +631,15 @@ static unsigned long __init setup_trampoline(void)
  * We are called very early to get the low memory for the
  * SMP bootup trampoline page.
  */
-unsigned long __init smp_alloc_memory(unsigned long mem_base)
+void __init smp_alloc_memory(void)
 {
-       if (virt_to_phys((void *)mem_base) >= 0x9F000)
+       trampoline_base = (void *) alloc_bootmem_pages(PAGE_SIZE);
+       /*
+        * Has to be in very low memory so we can execute
+        * real-mode AP code.
+        */
+       if (__pa(trampoline_base) >= 0x9F000)
                BUG();
-       trampoline_base = (void *)mem_base;
-       return mem_base + PAGE_SIZE;
 }
 
 /*
@@ -804,11 +808,10 @@ void __init setup_local_APIC(void)
        apic_write(APIC_DFR, value);
 }
 
-unsigned long __init init_smp_mappings(unsigned long memory_start)
+void __init init_smp_mappings(void)
 {
        unsigned long apic_phys;
 
-       memory_start = PAGE_ALIGN(memory_start);
        if (smp_found_config) {
                apic_phys = mp_lapic_addr;
        } else {
@@ -818,11 +821,10 @@ unsigned long __init init_smp_mappings(unsigned long memory_start)
                 * could use the real zero-page, but it's safer
                 * this way if some buggy code writes to this page ...
                 */
-               apic_phys = __pa(memory_start);
-               memset((void *)memory_start, 0, PAGE_SIZE);
-               memory_start += PAGE_SIZE;
+               apic_phys = __pa(alloc_bootmem_pages(PAGE_SIZE));
+               memset((void *)apic_phys, 0, PAGE_SIZE);
        }
-       set_fixmap(FIX_APIC_BASE,apic_phys);
+       set_fixmap(FIX_APIC_BASE, apic_phys);
        dprintk("mapped APIC to %08lx (%08lx)\n", APIC_BASE, apic_phys);
 
 #ifdef CONFIG_X86_IO_APIC
@@ -834,9 +836,8 @@ unsigned long __init init_smp_mappings(unsigned long memory_start)
                        if (smp_found_config) {
                                ioapic_phys = mp_ioapics[i].mpc_apicaddr;
                        } else {
-                               ioapic_phys = __pa(memory_start);
-                               memset((void *)memory_start, 0, PAGE_SIZE);
-                               memory_start += PAGE_SIZE;
+                               ioapic_phys = __pa(alloc_bootmem_pages(PAGE_SIZE));
+                               memset((void *)ioapic_phys, 0, PAGE_SIZE);
                        }
                        set_fixmap(idx,ioapic_phys);
                        dprintk("mapped IOAPIC to %08lx (%08lx)\n",
@@ -845,8 +846,6 @@ unsigned long __init init_smp_mappings(unsigned long memory_start)
                }
        }
 #endif
-
-       return memory_start;
 }
 
 /*
@@ -1112,6 +1111,12 @@ int __init start_secondary(void *unused)
        smp_callin();
        while (!atomic_read(&smp_commenced))
                /* nothing */ ;
+       /*
+        * low-memory mappings have been cleared, flush them from
+        * the local TLBs too.
+        */
+       local_flush_tlb();
+
        return cpu_idle();
 }
 
@@ -1153,7 +1158,6 @@ static int __init fork_by_hand(void)
 static void __init do_boot_cpu(int i)
 {
        unsigned long cfg;
-       pgd_t maincfg;
        struct task_struct *idle;
        unsigned long send_status, accept_status;
        int timeout, num_starts, j;
@@ -1207,9 +1211,6 @@ static void __init do_boot_cpu(int i)
        *((volatile unsigned short *) phys_to_virt(0x467)) = start_eip & 0xf;
        dprintk("3.\n");
 
-       maincfg=swapper_pg_dir[0];
-       ((unsigned long *)swapper_pg_dir)[0]=0x102007;
-
        /*
         * Be paranoid about clearing APIC errors.
         */
@@ -1367,9 +1368,6 @@ static void __init do_boot_cpu(int i)
                cpucount--;
        }
 
-       swapper_pg_dir[0]=maincfg;
-       local_flush_tlb();
-
        /* mark "stuck" area as not stuck */
        *((volatile unsigned long *)phys_to_virt(8192)) = 0;
 }
@@ -1567,14 +1565,9 @@ void __init smp_boot_cpus(void)
 
 #ifndef CONFIG_VISWS
        {
-               unsigned long cfg;
-
                /*
                 * Install writable page 0 entry to set BIOS data area.
                 */
-               cfg = pg0[0];
-               /* writeable, present, addr 0 */
-               pg0[0] = _PAGE_RW | _PAGE_PRESENT | 0;
                local_flush_tlb();
 
                /*
@@ -1584,12 +1577,6 @@ void __init smp_boot_cpus(void)
                CMOS_WRITE(0, 0xf);
 
                *((volatile long *) phys_to_virt(0x467)) = 0;
-
-               /*
-                * Restore old page 0 entry.
-                */
-               pg0[0] = cfg;
-               local_flush_tlb();
        }
 #endif
 
@@ -1646,5 +1633,7 @@ smp_done:
         */
        if (cpu_has_tsc && cpucount)
                synchronize_tsc_bp();
+
+       zap_low_mappings();
 }
 
index ebd1cd002c0916fd2e80f6a691eb8859965a815e..f66f2363c331aed91f8ccfbfbe70432b74a3af5a 100644 (file)
@@ -581,6 +581,7 @@ asmlinkage void math_emulate(long arg)
 
 #endif /* CONFIG_MATH_EMULATION */
 
+#ifndef CONFIG_M686
 void __init trap_init_f00f_bug(void)
 {
        unsigned long page;
@@ -596,8 +597,8 @@ void __init trap_init_f00f_bug(void)
        pgd = pgd_offset(&init_mm, page);
        pmd = pmd_offset(pgd, page);
        pte = pte_offset(pmd, page);
-       free_page(pte_page(*pte));
-       *pte = mk_pte(&idt_table, PAGE_KERNEL_RO);
+       __free_page(pte_page(*pte));
+       *pte = mk_pte_phys(__pa(&idt_table), PAGE_KERNEL_RO);
        local_flush_tlb();
 
        /*
@@ -608,6 +609,7 @@ void __init trap_init_f00f_bug(void)
        idt = (struct desc_struct *)page;
        __asm__ __volatile__("lidt %0": "=m" (idt_descr));
 }
+#endif
 
 #define _set_gate(gate_addr,type,dpl,addr) \
 do { \
@@ -772,7 +774,7 @@ cobalt_init(void)
 #endif
 void __init trap_init(void)
 {
-       if (readl(0x0FFFD9) == 'E' + ('I'<<8) + ('S'<<16) + ('A'<<24))
+       if (isa_readl(0x0FFFD9) == 'E'+('I'<<8)+('S'<<16)+('A'<<24))
                EISA_bus = 1;
 
        set_trap_gate(0,&divide_error);
index 65dd7e9da921484ee0a9b0895607866cd103bfb8..3fd5262ac5edbda3868168ac19e464aabb5d92eb 100644 (file)
@@ -102,7 +102,7 @@ static void mark_screen_rdonly(struct task_struct * tsk)
        if (pgd_none(*pgd))
                return;
        if (pgd_bad(*pgd)) {
-               printk("vm86: bad pgd entry [%p]:%08lx\n", pgd, pgd_val(*pgd));
+               pgd_ERROR(*pgd);
                pgd_clear(pgd);
                return;
        }
@@ -110,7 +110,7 @@ static void mark_screen_rdonly(struct task_struct * tsk)
        if (pmd_none(*pmd))
                return;
        if (pmd_bad(*pmd)) {
-               printk("vm86: bad pmd entry [%p]:%08lx\n", pmd, pmd_val(*pmd));
+               pmd_ERROR(*pmd);
                pmd_clear(pmd);
                return;
        }
index d60bc196923f7bffb29d01d88dfb52a9c0a7f5d0..cee7d4e6d129f323c3cf456d3592f30d1ac57c70 100644 (file)
@@ -10,8 +10,4 @@
 O_TARGET := mm.o
 O_OBJS  := init.o fault.o ioremap.o extable.o
 
-ifeq ($(CONFIG_BIGMEM),y)
-O_OBJS += bigmem.o
-endif
-
 include $(TOPDIR)/Rules.make
diff --git a/arch/i386/mm/bigmem.c b/arch/i386/mm/bigmem.c
deleted file mode 100644 (file)
index 8da0779..0000000
+++ /dev/null
@@ -1,33 +0,0 @@
-/*
- * BIGMEM IA32 code and variables.
- *
- * (C) 1999 Andrea Arcangeli, SuSE GmbH, andrea@suse.de
- *          Gerhard Wichert, Siemens AG, Gerhard.Wichert@pdb.siemens.de
- */
-
-#include <linux/mm.h>
-#include <linux/bigmem.h>
-
-unsigned long bigmem_start, bigmem_end;
-
-/* NOTE: fixmap_init alloc all the fixmap pagetables contigous on the
-   physical space so we can cache the place of the first one and move
-   around without checking the pgd every time. */
-pte_t *kmap_pte;
-pgprot_t kmap_prot;
-
-#define kmap_get_fixmap_pte(vaddr)                                     \
-       pte_offset(pmd_offset(pgd_offset_k(vaddr), (vaddr)), (vaddr))
-
-void __init kmap_init(void)
-{
-       unsigned long kmap_vstart;
-
-       /* cache the first kmap pte */
-       kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN);
-       kmap_pte = kmap_get_fixmap_pte(kmap_vstart);
-
-       kmap_prot = PAGE_KERNEL;
-       if (boot_cpu_data.x86_capability & X86_FEATURE_PGE)
-               pgprot_val(kmap_prot) |= _PAGE_GLOBAL;
-}
index 1f787900511e56c45fe4b28be822efd9d98b5ba9..b2a98859b7ca66e1b273f9cc456bdd28cbec7504 100644 (file)
@@ -76,6 +76,31 @@ bad_area:
        return 0;
 }
 
+static inline void handle_wp_test (void)
+{
+       const unsigned long vaddr = PAGE_OFFSET;
+       pgd_t *pgd;
+       pmd_t *pmd;
+       pte_t *pte;
+
+       /*
+        * make it read/writable temporarily, so that the fault
+        * can be handled.
+        */
+       pgd = swapper_pg_dir + __pgd_offset(vaddr);
+       pmd = pmd_offset(pgd, vaddr);
+       pte = pte_offset(pmd, vaddr);
+       *pte = mk_pte_phys(0, PAGE_KERNEL);
+       local_flush_tlb();
+
+       boot_cpu_data.wp_works_ok = 1;
+       /*
+        * Beware: Black magic here. The printk is needed here to flush
+        * CPU state on certain buggy processors.
+        */
+       printk("Ok");
+}
+
 asmlinkage void do_invalid_op(struct pt_regs *, unsigned long);
 extern unsigned long idt;
 
@@ -226,15 +251,8 @@ no_context:
  * First we check if it was the bootup rw-test, though..
  */
        if (boot_cpu_data.wp_works_ok < 0 &&
-           address == PAGE_OFFSET && (error_code & 1)) {
-               boot_cpu_data.wp_works_ok = 1;
-               pg0[0] = pte_val(mk_pte(PAGE_OFFSET, PAGE_KERNEL));
-               local_flush_tlb();
-               /*
-                * Beware: Black magic here. The printk is needed here to flush
-                * CPU state on certain buggy processors.
-                */
-               printk("Ok");
+                       address == PAGE_OFFSET && (error_code & 1)) {
+               handle_wp_test();
                return;
        }
 
index 98448808973863a01a0c3557c0f19e2666f61415..87e53e13283c51f9a3b083af6241a7ed0dd759de 100644 (file)
@@ -22,7 +22,9 @@
 #ifdef CONFIG_BLK_DEV_INITRD
 #include <linux/blk.h>
 #endif
-#include <linux/bigmem.h>
+#include <linux/highmem.h>
+#include <linux/pagemap.h>
+#include <linux/bootmem.h>
 
 #include <asm/processor.h>
 #include <asm/system.h>
 #include <asm/fixmap.h>
 #include <asm/e820.h>
 
-static unsigned long totalram = 0;
-static unsigned long totalbig = 0;
+unsigned long highstart_pfn, highend_pfn;
+static unsigned long totalram_pages = 0;
+static unsigned long totalhigh_pages = 0;
 
 extern void show_net_buffers(void);
-extern unsigned long init_smp_mappings(unsigned long);
 
-void __bad_pte_kernel(pmd_t *pmd)
+/*
+ * BAD_PAGE is the page that is used for page faults when linux
+ * is out-of-memory. Older versions of linux just did a
+ * do_exit(), but using this instead means there is less risk
+ * for a process dying in kernel mode, possibly leaving an inode
+ * unused etc..
+ *
+ * BAD_PAGETABLE is the accompanying page-table: it is initialized
+ * to point to BAD_PAGE entries.
+ *
+ * ZERO_PAGE is a special page that is used for zero-initialized
+ * data and COW.
+ */
+
+/*
+ * These are allocated in head.S so that we get proper page alignment.
+ * If you change the size of these then change head.S as well.
+ */
+extern char empty_bad_page[PAGE_SIZE];
+#if CONFIG_X86_PAE
+extern pmd_t empty_bad_pmd_table[PTRS_PER_PMD];
+#endif
+extern pte_t empty_bad_pte_table[PTRS_PER_PTE];
+
+/*
+ * We init them before every return and make them writable-shared.
+ * This guarantees we get out of the kernel in some more or less sane
+ * way.
+ */
+#if CONFIG_X86_PAE
+static pmd_t * get_bad_pmd_table(void)
 {
-       printk("Bad pmd in pte_alloc: %08lx\n", pmd_val(*pmd));
-       pmd_val(*pmd) = _KERNPG_TABLE + __pa(BAD_PAGETABLE);
+       pmd_t v;
+       int i;
+
+       pmd_val(v) = _PAGE_TABLE + __pa(empty_bad_pte_table);
+
+       for (i = 0; i < PAGE_SIZE/sizeof(pmd_t); i++)
+               empty_bad_pmd_table[i] = v;
+
+       return empty_bad_pmd_table;
 }
+#endif
 
-void __bad_pte(pmd_t *pmd)
+static pte_t * get_bad_pte_table(void)
 {
-       printk("Bad pmd in pte_alloc: %08lx\n", pmd_val(*pmd));
-       pmd_val(*pmd) = _PAGE_TABLE + __pa(BAD_PAGETABLE);
+       pte_t v;
+       int i;
+
+       v = pte_mkdirty(mk_pte_phys(__pa(empty_bad_page), PAGE_SHARED));
+
+       for (i = 0; i < PAGE_SIZE/sizeof(pte_t); i++)
+               empty_bad_pte_table[i] = v;
+
+       return empty_bad_pte_table;
+}
+
+
+
+void __handle_bad_pmd(pmd_t *pmd)
+{
+       pmd_ERROR(*pmd);
+       pmd_val(*pmd) = _PAGE_TABLE + __pa(get_bad_pte_table());
+}
+
+void __handle_bad_pmd_kernel(pmd_t *pmd)
+{
+       pmd_ERROR(*pmd);
+       pmd_val(*pmd) = _KERNPG_TABLE + __pa(get_bad_pte_table());
 }
 
 pte_t *get_pte_kernel_slow(pmd_t *pmd, unsigned long offset)
@@ -57,16 +118,16 @@ pte_t *get_pte_kernel_slow(pmd_t *pmd, unsigned long offset)
        pte = (pte_t *) __get_free_page(GFP_KERNEL);
        if (pmd_none(*pmd)) {
                if (pte) {
-                       clear_page((unsigned long)pte);
+                       clear_page(pte);
                        pmd_val(*pmd) = _KERNPG_TABLE + __pa(pte);
                        return pte + offset;
                }
-               pmd_val(*pmd) = _KERNPG_TABLE + __pa(BAD_PAGETABLE);
+               pmd_val(*pmd) = _KERNPG_TABLE + __pa(get_bad_pte_table());
                return NULL;
        }
        free_page((unsigned long)pte);
        if (pmd_bad(*pmd)) {
-               __bad_pte_kernel(pmd);
+               __handle_bad_pmd_kernel(pmd);
                return NULL;
        }
        return (pte_t *) pmd_page(*pmd) + offset;
@@ -79,19 +140,19 @@ pte_t *get_pte_slow(pmd_t *pmd, unsigned long offset)
        pte = (unsigned long) __get_free_page(GFP_KERNEL);
        if (pmd_none(*pmd)) {
                if (pte) {
-                       clear_page(pte);
+                       clear_page((void *)pte);
                        pmd_val(*pmd) = _PAGE_TABLE + __pa(pte);
-                       return (pte_t *)(pte + offset);
+                       return (pte_t *)pte + offset;
                }
-               pmd_val(*pmd) = _PAGE_TABLE + __pa(BAD_PAGETABLE);
+               pmd_val(*pmd) = _PAGE_TABLE + __pa(get_bad_pte_table());
                return NULL;
        }
        free_page(pte);
        if (pmd_bad(*pmd)) {
-               __bad_pte(pmd);
+               __handle_bad_pmd(pmd);
                return NULL;
        }
-       return (pte_t *) (pmd_page(*pmd) + offset);
+       return (pte_t *) pmd_page(*pmd) + offset;
 }
 
 int do_check_pgt_cache(int low, int high)
@@ -110,52 +171,36 @@ int do_check_pgt_cache(int low, int high)
        return freed;
 }
 
-/*
- * BAD_PAGE is the page that is used for page faults when linux
- * is out-of-memory. Older versions of linux just did a
- * do_exit(), but using this instead means there is less risk
- * for a process dying in kernel mode, possibly leaving an inode
- * unused etc..
- *
- * BAD_PAGETABLE is the accompanying page-table: it is initialized
- * to point to BAD_PAGE entries.
- *
- * ZERO_PAGE is a special page that is used for zero-initialized
- * data and COW.
- */
-pte_t * __bad_pagetable(void)
-{
-       extern char empty_bad_page_table[PAGE_SIZE];
-       int d0, d1;
-
-       __asm__ __volatile__("cld ; rep ; stosl"
-                            : "=&D" (d0), "=&c" (d1)
-                            : "a" (pte_val(BAD_PAGE)),
-                            "0" ((long) empty_bad_page_table),
-                            "1" (PAGE_SIZE/4)
-                            : "memory");
-       return (pte_t *) empty_bad_page_table;
-}
+/* NOTE: fixmap_init alloc all the fixmap pagetables contigous on the
+   physical space so we can cache the place of the first one and move
+   around without checking the pgd every time. */
+
+#if CONFIG_HIGHMEM
+pte_t *kmap_pte;
+pgprot_t kmap_prot;
 
-pte_t __bad_page(void)
+#define kmap_get_fixmap_pte(vaddr)                                     \
+       pte_offset(pmd_offset(pgd_offset_k(vaddr), (vaddr)), (vaddr))
+
+void __init kmap_init(void)
 {
-       extern char empty_bad_page[PAGE_SIZE];
-       int d0, d1;
-
-       __asm__ __volatile__("cld ; rep ; stosl"
-                            : "=&D" (d0), "=&c" (d1)
-                            : "a" (0),
-                            "0" ((long) empty_bad_page),
-                            "1" (PAGE_SIZE/4)
-                            : "memory");
-       return pte_mkdirty(mk_pte((unsigned long) empty_bad_page, PAGE_SHARED));
+       unsigned long kmap_vstart;
+
+       /* cache the first kmap pte */
+       kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN);
+       kmap_pte = kmap_get_fixmap_pte(kmap_vstart);
+
+       kmap_prot = PAGE_KERNEL;
+       if (boot_cpu_data.x86_capability & X86_FEATURE_PGE)
+               pgprot_val(kmap_prot) |= _PAGE_GLOBAL;
 }
+#endif
 
 void show_mem(void)
 {
-       int i,free = 0,total = 0,reserved = 0;
+       int i,free = 0, total = 0, reserved = 0;
        int shared = 0, cached = 0;
-       int bigmem = 0;
+       int highmem = 0;
 
        printk("Mem-info:\n");
        show_free_areas();
@@ -163,8 +208,8 @@ void show_mem(void)
        i = max_mapnr;
        while (i-- > 0) {
                total++;
-               if (PageBIGMEM(mem_map+i))
-                       bigmem++;
+               if (PageHighMem(mem_map+i))
+                       highmem++;
                if (PageReserved(mem_map+i))
                        reserved++;
                else if (PageSwapCache(mem_map+i))
@@ -174,8 +219,8 @@ void show_mem(void)
                else
                        shared += page_count(mem_map+i) - 1;
        }
-       printk("%d pages of RAM\n",total);
-       printk("%d pages of BIGMEM\n",bigmem);
+       printk("%d pages of RAM\n", total);
+       printk("%d pages of HIGHMEM\n",highmem);
        printk("%d reserved pages\n",reserved);
        printk("%d pages shared\n",shared);
        printk("%d pages swap cached\n",cached);
@@ -186,48 +231,30 @@ void show_mem(void)
 #endif
 }
 
-extern unsigned long free_area_init(unsigned long, unsigned long);
-
 /* References to section boundaries */
 
 extern char _text, _etext, _edata, __bss_start, _end;
 extern char __init_begin, __init_end;
 
-/*
- * allocate page table(s) for compile-time fixed mappings
- */
-static unsigned long __init fixmap_init(unsigned long start_mem)
-{
-       pgd_t * pg_dir;
-       unsigned int idx;
-       unsigned long address;
-
-       start_mem = PAGE_ALIGN(start_mem);
-
-       for (idx=1; idx <= __end_of_fixed_addresses; idx += PTRS_PER_PTE)
-       {
-               address = __fix_to_virt(__end_of_fixed_addresses-idx);
-               pg_dir = swapper_pg_dir + (address >> PGDIR_SHIFT);
-               memset((void *)start_mem, 0, PAGE_SIZE);
-               pgd_val(*pg_dir) = _PAGE_TABLE | __pa(start_mem);
-               start_mem += PAGE_SIZE;
-       }
-
-       return start_mem;
-}
-
 static void set_pte_phys (unsigned long vaddr, unsigned long phys)
 {
        pgprot_t prot;
-       pte_t * pte;
+       pgd_t *pgd;
+       pmd_t *pmd;
+       pte_t *pte;
 
-       pte = pte_offset(pmd_offset(pgd_offset_k(vaddr), vaddr), vaddr);
+       pgd = swapper_pg_dir + __pgd_offset(vaddr);
+       pmd = pmd_offset(pgd, vaddr);
+       pte = pte_offset(pmd, vaddr);
        prot = PAGE_KERNEL;
        if (boot_cpu_data.x86_capability & X86_FEATURE_PGE)
                pgprot_val(prot) |= _PAGE_GLOBAL;
        set_pte(pte, mk_pte_phys(phys, prot));
 
-       local_flush_tlb();
+       /*
+        * It's enough to flush this one mapping.
+        */
+       __flush_tlb_one(vaddr);
 }
 
 void set_fixmap (enum fixed_addresses idx, unsigned long phys)
@@ -241,6 +268,123 @@ void set_fixmap (enum fixed_addresses idx, unsigned long phys)
        set_pte_phys (address,phys);
 }
 
+static void __init pagetable_init(void)
+{
+       pgd_t *pgd, *pgd_base;
+       pmd_t *pmd;
+       pte_t *pte;
+       int i, j, k;
+       unsigned long vaddr;
+       unsigned long end = (unsigned long)__va(max_low_pfn*PAGE_SIZE);
+
+       pgd_base = swapper_pg_dir;
+
+       vaddr = PAGE_OFFSET;
+       i = __pgd_offset(vaddr);
+       pgd = pgd_base + i;
+
+       for (; (i < PTRS_PER_PGD) && (vaddr <= end); pgd++, i++) {
+               vaddr = i*PGDIR_SIZE;
+#if CONFIG_X86_PAE
+               pmd = (pmd_t *) alloc_bootmem_pages(PAGE_SIZE);
+               memset((void*)pmd, 0, PAGE_SIZE);
+               pgd_val(*pgd) = __pa(pmd) + 0x1;
+#else
+               pmd = (pmd_t *)pgd;
+#endif
+               if (pmd != pmd_offset(pgd, 0))
+                       BUG();
+               for (j = 0; (j < PTRS_PER_PMD) && (vaddr <= end); pmd++, j++) {
+                       vaddr = i*PGDIR_SIZE + j*PMD_SIZE;
+                       if (cpu_has_pse) {
+                               unsigned long __pe;
+
+                               set_in_cr4(X86_CR4_PSE);
+                               boot_cpu_data.wp_works_ok = 1;
+                               __pe = _KERNPG_TABLE + _PAGE_PSE + __pa(vaddr);
+                               /* Make it "global" too if supported */
+                               if (cpu_has_pge) {
+                                       set_in_cr4(X86_CR4_PGE);
+                                       __pe += _PAGE_GLOBAL;
+                               }
+                               pmd_val(*pmd) = __pe;
+                               continue;
+                       }
+
+                       pte = (pte_t *) alloc_bootmem_pages(PAGE_SIZE);
+                       memset((void*)pte, 0, PAGE_SIZE);
+                       pmd_val(*pmd) = _KERNPG_TABLE + __pa(pte);
+
+                       if (pte != pte_offset(pmd, 0))
+                               BUG();
+
+                       for (k = 0;
+                               (k < PTRS_PER_PTE) && (vaddr <= end);
+                                       pte++, k++) {
+                               vaddr = i*PGDIR_SIZE + j*PMD_SIZE + k*PAGE_SIZE;
+                               *pte = mk_pte_phys(__pa(vaddr), PAGE_KERNEL);
+                       }
+               }
+       }
+
+       /*
+        * Fixed mappings, only the page table structure has to be
+        * created - mappings will be set by set_fixmap():
+        */
+
+       vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK;
+       i = __pgd_offset(vaddr);
+       j = __pmd_offset(vaddr);
+       pgd = pgd_base + i;
+
+       for ( ; (i < PTRS_PER_PGD) && vaddr; pgd++, i++) {
+#if CONFIG_X86_PAE
+               if (pgd_none(*pgd)) {
+                       pmd = (pmd_t *) alloc_bootmem_pages(PAGE_SIZE);
+                       memset((void*)pmd, 0, PAGE_SIZE);
+                       pgd_val(*pgd) = __pa(pmd) + 0x1;
+                       if (pmd != pmd_offset(pgd, vaddr))
+                               BUG();
+               }
+               pmd = pmd_offset(pgd, vaddr);
+#else
+               pmd = (pmd_t *)pgd;
+#endif
+               for (; (j < PTRS_PER_PMD) && vaddr; pmd++, j++) {
+                       if (pmd_none(*pmd)) {
+                               pte = (pte_t *) alloc_bootmem_pages(PAGE_SIZE);
+                               memset((void*)pte, 0, PAGE_SIZE);
+                               pmd_val(*pmd) = _KERNPG_TABLE + __pa(pte);
+                               if (pte != pte_offset(pmd, 0))
+                                       BUG();
+                       }
+                       vaddr += PMD_SIZE;
+               }
+               j = 0;
+       }
+
+#if CONFIG_X86_PAE
+       /*
+        * Add low memory identity-mappings - SMP needs it when
+        * starting up on an AP from real-mode. In the non-PAE
+        * case we already have these mappings through head.S.
+        * All user-space mappings are explicitly cleared after
+        * SMP startup.
+        */
+       pgd_base[0] = pgd_base[USER_PTRS_PER_PGD];
+#endif
+}
+
+void __init zap_low_mappings (void)
+{
+       int i;
+       /*
+        * Zap initial low-memory mappings:
+        */
+       for (i = 0; i < USER_PTRS_PER_PGD; i++)
+               pgd_clear(swapper_pg_dir + i);
+}
+
 /*
  * paging_init() sets up the page tables - note that the first 4MB are
  * already mapped by head.S.
@@ -248,89 +392,36 @@ void set_fixmap (enum fixed_addresses idx, unsigned long phys)
  * This routines also unmaps the page at virtual kernel address 0, so
  * that we can trap those pesky NULL-reference errors in the kernel.
  */
-unsigned long __init paging_init(unsigned long start_mem, unsigned long end_mem)
+void __init paging_init(void)
 {
-       pgd_t * pg_dir;
-       pte_t * pg_table;
-       unsigned long tmp;
-       unsigned long address;
+       pagetable_init();
 
-/*
- * Physical page 0 is special; it's not touched by Linux since BIOS
- * and SMM (for laptops with [34]86/SL chips) may need it.  It is read
- * and write protected to detect null pointer references in the
- * kernel.
- * It may also hold the MP configuration table when we are booting SMP.
- */
-       start_mem = PAGE_ALIGN(start_mem);
-       address = PAGE_OFFSET;
-       pg_dir = swapper_pg_dir;
-       /* unmap the original low memory mappings */
-       pgd_val(pg_dir[0]) = 0;
-
-       /* Map whole memory from PAGE_OFFSET */
-       pg_dir += USER_PGD_PTRS;
-       while (address < end_mem) {
-               /*
-                * If we're running on a Pentium CPU, we can use the 4MB
-                * page tables. 
-                *
-                * The page tables we create span up to the next 4MB
-                * virtual memory boundary, but that's OK as we won't
-                * use that memory anyway.
-                */
-               if (boot_cpu_data.x86_capability & X86_FEATURE_PSE) {
-                       unsigned long __pe;
-
-                       set_in_cr4(X86_CR4_PSE);
-                       boot_cpu_data.wp_works_ok = 1;
-                       __pe = _KERNPG_TABLE + _PAGE_4M + __pa(address);
-                       /* Make it "global" too if supported */
-                       if (boot_cpu_data.x86_capability & X86_FEATURE_PGE) {
-                               set_in_cr4(X86_CR4_PGE);
-                               __pe += _PAGE_GLOBAL;
-                       }
-                       pgd_val(*pg_dir) = __pe;
-                       pg_dir++;
-                       address += 4*1024*1024;
-                       continue;
-               }
+       __asm__( "movl %%ecx,%%cr3\n" ::"c"(__pa(swapper_pg_dir)));
 
-               /*
-                * We're on a [34]86, use normal page tables.
-                * pg_table is physical at this point
-                */
-               pg_table = (pte_t *) (PAGE_MASK & pgd_val(*pg_dir));
-               if (!pg_table) {
-                       pg_table = (pte_t *) __pa(start_mem);
-                       start_mem += PAGE_SIZE;
-               }
+#if CONFIG_X86_PAE
+       /*
+        * We will bail out later - printk doesnt work right now so
+        * the user would just see a hanging kernel.
+        */
+       if (cpu_has_pae)
+               set_in_cr4(X86_CR4_PAE);
+#endif
+
+       __flush_tlb();
 
-               pgd_val(*pg_dir) = _PAGE_TABLE | (unsigned long) pg_table;
-               pg_dir++;
-
-               /* now change pg_table to kernel virtual addresses */
-               pg_table = (pte_t *) __va(pg_table);
-               for (tmp = 0 ; tmp < PTRS_PER_PTE ; tmp++,pg_table++) {
-                       pte_t pte = mk_pte(address, PAGE_KERNEL);
-                       if (address >= end_mem)
-                               pte_val(pte) = 0;
-                       set_pte(pg_table, pte);
-                       address += PAGE_SIZE;
-               }
-       }
-       start_mem = fixmap_init(start_mem);
 #ifdef __SMP__
-       start_mem = init_smp_mappings(start_mem);
+       init_smp_mappings();
 #endif
-       local_flush_tlb();
 
-#ifndef CONFIG_BIGMEM
-       return free_area_init(start_mem, end_mem);
-#else
+#ifdef CONFIG_HIGHMEM
        kmap_init(); /* run after fixmap_init */
-       return free_area_init(start_mem, bigmem_end + PAGE_OFFSET);
 #endif
+#ifdef CONFIG_HIGHMEM
+       free_area_init(highend_pfn);
+#else
+       free_area_init(max_low_pfn);
+#endif
+       return;
 }
 
 /*
@@ -341,23 +432,38 @@ unsigned long __init paging_init(unsigned long start_mem, unsigned long end_mem)
 
 void __init test_wp_bit(void)
 {
-       unsigned char tmp_reg;
-       unsigned long old = pg0[0];
+/*
+ * Ok, all PAE-capable CPUs are definitely handling the WP bit right.
+ */
+//#ifndef CONFIG_X86_PAE
+       const unsigned long vaddr = PAGE_OFFSET;
+       pgd_t *pgd;
+       pmd_t *pmd;
+       pte_t *pte, old_pte;
+       char tmp_reg;
 
        printk("Checking if this processor honours the WP bit even in supervisor mode... ");
-       pg0[0] = pte_val(mk_pte(PAGE_OFFSET, PAGE_READONLY));
+
+       pgd = swapper_pg_dir + __pgd_offset(vaddr);
+       pmd = pmd_offset(pgd, vaddr);
+       pte = pte_offset(pmd, vaddr);
+       old_pte = *pte;
+       *pte = mk_pte_phys(0, PAGE_READONLY);
        local_flush_tlb();
+
        __asm__ __volatile__(
                "jmp 1f; 1:\n"
                "movb %0,%1\n"
                "movb %1,%0\n"
                "jmp 1f; 1:\n"
-               :"=m" (*(char *) __va(0)),
+               :"=m" (*(char *) vaddr),
                 "=q" (tmp_reg)
                :/* no inputs */
                :"memory");
-       pg0[0] = old;
+
+       *pte = old_pte;
        local_flush_tlb();
+
        if (boot_cpu_data.wp_works_ok < 0) {
                boot_cpu_data.wp_works_ok = 0;
                printk("No.\n");
@@ -366,136 +472,95 @@ void __init test_wp_bit(void)
 #endif
        } else
                printk(".\n");
+//#endif
 }
 
-static void __init mem_init_region(unsigned long pfn, unsigned long count, unsigned long start_mem_pfn)
+static inline int page_is_ram (unsigned long pagenr)
 {
-       printk("memory region: %luk @ %08lx000\n", count << 2, pfn);
+       int i;
 
-       do {
-               if (pfn >= max_mapnr)
-                       break;
+       for (i = 0; i < e820.nr_map; i++) {
+               unsigned long addr, size;
 
-               /* Avoid the kernel mapping between HIGH_MEMORY and "start_mem".. */
-               if (pfn < (HIGH_MEMORY >> PAGE_SHIFT) || pfn >= start_mem_pfn)
-                       clear_bit(PG_reserved, &mem_map[pfn].flags);
-
-               pfn++;
-       } while (--count > 0);
+               if (e820.map[i].type != E820_RAM)       /* not usable memory */
+                       continue;
+               addr = (e820.map[i].addr+PAGE_SIZE-1) >> PAGE_SHIFT;
+               size = e820.map[i].size >> PAGE_SHIFT;
+               if  ((pagenr >= addr) && (pagenr < addr+size))
+                       return 1;
+       }
+       return 0;
 }
 
-void __init mem_init(unsigned long start_mem, unsigned long end_mem)
+void __init mem_init(void)
 {
-       unsigned long start_low_mem = PAGE_SIZE;
        int codepages = 0;
        int reservedpages = 0;
        int datapages = 0;
        int initpages = 0;
-       unsigned long tmp;
-       int i, avail;
-
-       end_mem &= PAGE_MASK;
-#ifdef CONFIG_BIGMEM
-       bigmem_start = PAGE_ALIGN(bigmem_start);
-       bigmem_end &= PAGE_MASK;
-#endif
-       high_memory = (void *) end_mem;
-#ifndef CONFIG_BIGMEM
-       max_mapnr = num_physpages = MAP_NR(end_mem);
+#ifdef CONFIG_HIGHMEM
+       int tmp;
+
+       if (!mem_map)
+               BUG();
+       highmem_start_page = mem_map + highstart_pfn;
+       /* cache the highmem_mapnr */
+       highmem_mapnr = highstart_pfn;
+       max_mapnr = num_physpages = highend_pfn;
 #else
-       max_mapnr = num_physpages = PHYSMAP_NR(bigmem_end);
-       /* cache the bigmem_mapnr */
-       bigmem_mapnr = PHYSMAP_NR(bigmem_start);
+       max_mapnr = num_physpages = max_low_pfn;
 #endif
+       high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
 
        /* clear the zero-page */
        memset(empty_zero_page, 0, PAGE_SIZE);
 
-       /* mark usable pages in the mem_map[] */
-       start_low_mem = PAGE_ALIGN(start_low_mem)+PAGE_OFFSET;
+       /* this will put all low memory onto the freelists */
+       totalram_pages += free_all_bootmem();
 
-#ifdef __SMP__
-       /*
-        * But first pinch a few for the stack/trampoline stuff
-        *      FIXME: Don't need the extra page at 4K, but need to fix
-        *      trampoline before removing it. (see the GDT stuff)
-        *
-        */
-       start_low_mem += PAGE_SIZE;                             /* 32bit startup code */
-       start_low_mem = smp_alloc_memory(start_low_mem);        /* AP processor stacks */
-#endif
-       start_mem = PAGE_ALIGN(start_mem);
+#ifdef CONFIG_HIGHMEM
+       for (tmp = highstart_pfn; tmp < highend_pfn; tmp++) {
+               struct page *page = mem_map + tmp;
 
-       /* walk the whitelist, unreserving good memory
-        */
-       for (avail = i = 0; i < e820.nr_map; i++) {
-               unsigned long start_pfn, end_pfn;
-
-               if (e820.map[i].type != E820_RAM)       /* not usable memory */
-                       continue;
-
-               start_pfn = (e820.map[i].addr + PAGE_SIZE - 1) >> PAGE_SHIFT;
-               end_pfn = (e820.map[i].addr + e820.map[i].size) >> PAGE_SHIFT;
-
-               /* We have a certain amount of low memory reserved */
-               if (start_pfn < MAP_NR(start_low_mem))
-                       start_pfn = MAP_NR(start_low_mem);
-
-               if (end_pfn <= start_pfn)
-                       continue;
-
-               mem_init_region(start_pfn, end_pfn - start_pfn, MAP_NR(start_mem));
-       }
-
-       for (tmp = PAGE_OFFSET ; tmp < end_mem ; tmp += PAGE_SIZE) {
-               if (tmp >= MAX_DMA_ADDRESS)
-                       clear_bit(PG_DMA, &mem_map[MAP_NR(tmp)].flags);
-               if (PageReserved(mem_map+MAP_NR(tmp))) {
-                       if (tmp >= (unsigned long) &_text && tmp < (unsigned long) &_edata) {
-                               if (tmp < (unsigned long) &_etext)
-                                       codepages++;
-                               else
-                                       datapages++;
-                       } else if (tmp >= (unsigned long) &__init_begin
-                                  && tmp < (unsigned long) &__init_end)
-                               initpages++;
-                       else if (tmp >= (unsigned long) &__bss_start
-                                && tmp < (unsigned long) start_mem)
-                               datapages++;
-                       else
-                               reservedpages++;
+               if (!page_is_ram(tmp)) {
+                       SetPageReserved(page);
                        continue;
                }
-               set_page_count(mem_map+MAP_NR(tmp), 1);
-               totalram += PAGE_SIZE;
-#ifdef CONFIG_BLK_DEV_INITRD
-               if (!initrd_start || (tmp < initrd_start || tmp >= initrd_end))
-#endif
-                       free_page(tmp);
+               ClearPageReserved(page);
+               set_bit(PG_highmem, &page->flags);
+               atomic_set(&page->count, 1);
+               __free_page(page);
+               totalhigh_pages++;
        }
-#ifdef CONFIG_BIGMEM
-       for (tmp = bigmem_start; tmp < bigmem_end;  tmp += PAGE_SIZE) {
-               clear_bit(PG_reserved, &mem_map[PHYSMAP_NR(tmp)].flags);
-               set_bit(PG_BIGMEM, &mem_map[PHYSMAP_NR(tmp)].flags);
-               atomic_set(&mem_map[PHYSMAP_NR(tmp)].count, 1);
-               free_page(tmp + PAGE_OFFSET);
-               totalbig += PAGE_SIZE;
-       }
-       totalram += totalbig;
+       totalram_pages += totalhigh_pages;
 #endif
-       printk("Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, %dk init, %dk bigmem)\n",
+       printk("Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, %dk init, %ldk highmem)\n",
                (unsigned long) nr_free_pages << (PAGE_SHIFT-10),
                max_mapnr << (PAGE_SHIFT-10),
                codepages << (PAGE_SHIFT-10),
                reservedpages << (PAGE_SHIFT-10),
                datapages << (PAGE_SHIFT-10),
                initpages << (PAGE_SHIFT-10),
-               (int) (totalbig >> 10)
+               (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10))
               );
 
+#if CONFIG_X86_PAE
+       if (!cpu_has_pae)
+               panic("cannot execute a PAE-enabled kernel on a PAE-incapable CPU!");
+#endif
        if (boot_cpu_data.wp_works_ok < 0)
                test_wp_bit();
 
+       /*
+        * Subtle. SMP is doing it's boot stuff late (because it has to
+        * fork idle threads) - but it also needs low mappings for the
+        * protected-mode entry to work. We zap these entries only after
+        * the WP-bit has been tested.
+        */
+#ifndef CONFIG_SMP
+       zap_low_mappings();
+#endif
+
 }
 
 void free_initmem(void)
@@ -504,21 +569,22 @@ void free_initmem(void)
        
        addr = (unsigned long)(&__init_begin);
        for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) {
-               mem_map[MAP_NR(addr)].flags &= ~(1 << PG_reserved);
+               ClearPageReserved(mem_map + MAP_NR(addr));
                set_page_count(mem_map+MAP_NR(addr), 1);
                free_page(addr);
-               totalram += PAGE_SIZE;
+               totalram_pages++;
        }
        printk ("Freeing unused kernel memory: %dk freed\n", (&__init_end - &__init_begin) >> 10);
 }
 
 void si_meminfo(struct sysinfo *val)
 {
-       val->totalram = totalram;
+       val->totalram = totalram_pages;
        val->sharedram = 0;
-       val->freeram = nr_free_pages << PAGE_SHIFT;
-       val->bufferram = atomic_read(&buffermem);
-       val->totalbig = totalbig;
-       val->freebig = nr_free_bigpages << PAGE_SHIFT;
+       val->freeram = nr_free_pages;
+       val->bufferram = atomic_read(&buffermem_pages);
+       val->totalhigh = totalhigh_pages;
+       val->freehigh = nr_free_highpages;
+       val->mem_unit = PAGE_SIZE;
        return;
 }
index 32f3c33fd47647f01a3091936114da036ea1a4bc..d694553100bc7dc37fcd06a5e9b8570f4cbf49f0 100644 (file)
@@ -20,15 +20,19 @@ static inline void remap_area_pte(pte_t * pte, unsigned long address, unsigned l
        end = address + size;
        if (end > PMD_SIZE)
                end = PMD_SIZE;
+       if (address >= end)
+               BUG();
        do {
-               if (!pte_none(*pte))
+               if (!pte_none(*pte)) {
                        printk("remap_area_pte: page already exists\n");
+                       BUG();
+               }
                set_pte(pte, mk_pte_phys(phys_addr, __pgprot(_PAGE_PRESENT | _PAGE_RW | 
                                        _PAGE_DIRTY | _PAGE_ACCESSED | flags)));
                address += PAGE_SIZE;
                phys_addr += PAGE_SIZE;
                pte++;
-       } while (address < end);
+       } while (address && (address < end));
 }
 
 static inline int remap_area_pmd(pmd_t * pmd, unsigned long address, unsigned long size,
@@ -41,6 +45,8 @@ static inline int remap_area_pmd(pmd_t * pmd, unsigned long address, unsigned lo
        if (end > PGDIR_SIZE)
                end = PGDIR_SIZE;
        phys_addr -= address;
+       if (address >= end)
+               BUG();
        do {
                pte_t * pte = pte_alloc_kernel(pmd, address);
                if (!pte)
@@ -48,7 +54,7 @@ static inline int remap_area_pmd(pmd_t * pmd, unsigned long address, unsigned lo
                remap_area_pte(pte, address, end - address, address + phys_addr, flags);
                address = (address + PMD_SIZE) & PMD_MASK;
                pmd++;
-       } while (address < end);
+       } while (address && (address < end));
        return 0;
 }
 
@@ -61,8 +67,11 @@ static int remap_area_pages(unsigned long address, unsigned long phys_addr,
        phys_addr -= address;
        dir = pgd_offset(&init_mm, address);
        flush_cache_all();
-       while (address < end) {
-               pmd_t *pmd = pmd_alloc_kernel(dir, address);
+       if (address >= end)
+               BUG();
+       do {
+               pmd_t *pmd;
+               pmd = pmd_alloc_kernel(dir, address);
                if (!pmd)
                        return -ENOMEM;
                if (remap_area_pmd(pmd, address, end - address,
@@ -71,7 +80,7 @@ static int remap_area_pages(unsigned long address, unsigned long phys_addr,
                set_pgdir(address, *dir);
                address = (address + PGDIR_SIZE) & PGDIR_MASK;
                dir++;
-       }
+       } while (address && (address < end));
        flush_tlb_all();
        return 0;
 }
index b1d0979c3ba9c9b7ccb2b5cc0b9e25523732b846..338d51c9a95198d32b772debe4ef23571d0c0cfd 100644 (file)
@@ -461,7 +461,7 @@ int ide_dmaproc (ide_dma_action_t func, ide_drive_t *drive)
 int ide_release_dma (ide_hwif_t *hwif)
 {
        if (hwif->dmatable) {
-               clear_page((unsigned long)hwif->dmatable);      /* clear PRD 1st */
+               clear_page((void *)hwif->dmatable);     /* clear PRD 1st */
                free_page((unsigned long)hwif->dmatable);       /* free PRD 2nd */
        }
        if ((hwif->dma_extra) && (hwif->channel == 0))
index 1a506e3536ebe7601eb04138d701116d3a87bade..86b6c022f55ae03f428cbb22066c95d5ebb437d8 100644 (file)
@@ -923,6 +923,7 @@ void ide_error (ide_drive_t *drive, const char *msg, byte stat)
  */
 void ide_cmd(ide_drive_t *drive, byte cmd, byte nsect, ide_handler_t *handler)
 {
+       drive->timeout = WAIT_CMD;
        ide_set_handler (drive, handler);
        if (IDE_CONTROL_REG)
                OUT_BYTE(drive->ctl,IDE_CONTROL_REG);   /* clear nIEN */
index f452237ab979b73a7c8564a986b396c023f10032..49512fe42e218839d6d402bb73d2170b38b2d1b4 100644 (file)
@@ -94,6 +94,7 @@
 #ifdef CONFIG_APM
 #include <linux/apm_bios.h>
 #endif
+#include <linux/bootmem.h>
 
 #include <asm/io.h>
 #include <asm/system.h>
@@ -2286,7 +2287,7 @@ static void vc_init(unsigned int currcons, unsigned int rows, unsigned int cols,
 struct tty_driver console_driver;
 static int console_refcount;
 
-unsigned long __init con_init(unsigned long kmem_start)
+void __init con_init(void)
 {
        const char *display_desc = NULL;
        unsigned int currcons = 0;
@@ -2295,7 +2296,7 @@ unsigned long __init con_init(unsigned long kmem_start)
                display_desc = conswitchp->con_startup();
        if (!display_desc) {
                fg_console = 0;
-               return kmem_start;
+               return;
        }
 
        memset(&console_driver, 0, sizeof(struct tty_driver));
@@ -2336,19 +2337,18 @@ unsigned long __init con_init(unsigned long kmem_start)
                timer_active |= 1<<BLANK_TIMER;
        }
 
-       /* Unfortunately, kmalloc is not running yet */
-       /* Due to kmalloc roundup allocating statically is more efficient -
-          so provide MIN_NR_CONSOLES for people with very little memory */
+       /*
+        * kmalloc is not running yet - we use the bootmem allocator.
+        */
        for (currcons = 0; currcons < MIN_NR_CONSOLES; currcons++) {
                int j, k ;
 
-               vc_cons[currcons].d = (struct vc_data *) kmem_start;
-               kmem_start += sizeof(struct vc_data);
-               vt_cons[currcons] = (struct vt_struct *) kmem_start;
-               kmem_start += sizeof(struct vt_struct);
+               vc_cons[currcons].d = (struct vc_data *)
+                               alloc_bootmem(sizeof(struct vc_data));
+               vt_cons[currcons] = (struct vt_struct *)
+                               alloc_bootmem(sizeof(struct vt_struct));
                visual_init(currcons, 1);
-               screenbuf = (unsigned short *) kmem_start;
-               kmem_start += screenbuf_size;
+               screenbuf = (unsigned short *) alloc_bootmem(screenbuf_size);
                kmalloced = 0;
                vc_init(currcons, video_num_lines, video_num_columns, 
                        currcons || !sw->con_save_screen);
@@ -2376,8 +2376,6 @@ unsigned long __init con_init(unsigned long kmem_start)
 #endif
 
        init_bh(CONSOLE_BH, console_bh);
-       
-       return kmem_start;
 }
 
 #ifndef VT_SINGLE_DRIVER
index 9027aa67ee010cda3d8c1889910b6c42b161a0c4..0da69c55c8f9de9bbf19c8bdbb04d7388ce0556f 100644 (file)
@@ -811,7 +811,7 @@ static int n_tty_open(struct tty_struct *tty)
 
        if (!tty->read_buf) {
                tty->read_buf = (unsigned char *)
-                       get_free_page(in_interrupt() ? GFP_ATOMIC : GFP_KERNEL);
+                       get_zeroed_page(in_interrupt() ? GFP_ATOMIC : GFP_KERNEL);
                if (!tty->read_buf)
                        return -ENOMEM;
        }
index 44fd4fe8f1f3c2b72ba553da90cbf0ca2aaa7bef..89c067688012b76e27aea2152c90f3245c150cf3 100644 (file)
@@ -1127,7 +1127,7 @@ static int startup(struct async_struct * info)
        unsigned short ICP;
 #endif
 
-       page = get_free_page(GFP_KERNEL);
+       page = get_zeroed_page(GFP_KERNEL);
        if (!page)
                return -ENOMEM;
 
@@ -2974,7 +2974,7 @@ static int rs_open(struct tty_struct *tty, struct file * filp)
 #endif
 
        if (!tmp_buf) {
-               page = get_free_page(GFP_KERNEL);
+               page = get_zeroed_page(GFP_KERNEL);
                if (!page) {
                        return -ENOMEM;
                }
@@ -4359,10 +4359,9 @@ static struct console sercons = {
 /*
  *     Register console.
  */
-long __init serial_console_init(long kmem_start, long kmem_end)
+void __init serial_console_init(void)
 {
        register_console(&sercons);
-       return kmem_start;
 }
 #endif
 
index 17711734f336bca6f586e4e38cf9bcfd5c63b69e..f7817128180108b1855f2d44e900859502ded606 100644 (file)
@@ -129,7 +129,7 @@ static int tty_fasync(int fd, struct file * filp, int on);
 extern int sx_init (void);
 #endif
 #ifdef CONFIG_8xx
-extern long console_8xx_init(long, long);
+extern console_8xx_init(void);
 extern int rs_8xx_init(void);
 #endif /* CONFIG_8xx */
 
@@ -798,7 +798,7 @@ static int init_dev(kdev_t device, struct tty_struct **ret_tty)
        tp = o_tp = NULL;
        ltp = o_ltp = NULL;
 
-       tty = (struct tty_struct*) get_free_page(GFP_KERNEL);
+       tty = (struct tty_struct*) get_zeroed_page(GFP_KERNEL);
        if(!tty)
                goto fail_no_mem;
        initialize_tty_struct(tty);
@@ -824,7 +824,7 @@ static int init_dev(kdev_t device, struct tty_struct **ret_tty)
        }
 
        if (driver->type == TTY_DRIVER_TYPE_PTY) {
-               o_tty = (struct tty_struct *) get_free_page(GFP_KERNEL);
+               o_tty = (struct tty_struct *) get_zeroed_page(GFP_KERNEL);
                if (!o_tty)
                        goto free_mem_out;
                initialize_tty_struct(o_tty);
@@ -2062,7 +2062,7 @@ int tty_unregister_driver(struct tty_driver *driver)
  * Just do some early initializations, and do the complex setup
  * later.
  */
-long __init console_init(long kmem_start, long kmem_end)
+void __init console_init(void)
 {
        /* Setup the default TTY line discipline. */
        memset(ldiscs, 0, sizeof(ldiscs));
@@ -2085,16 +2085,15 @@ long __init console_init(long kmem_start, long kmem_end)
         * inform about problems etc..
         */
 #ifdef CONFIG_VT
-       kmem_start = con_init(kmem_start);
+       con_init();
 #endif
 #ifdef CONFIG_SERIAL_CONSOLE
 #ifdef CONFIG_8xx
-       kmem_start = console_8xx_init(kmem_start, kmem_end);
+       console_8xx_init();
 #else  
-       kmem_start = serial_console_init(kmem_start, kmem_end);
+       serial_console_init();
 #endif /* CONFIG_8xx */
 #endif
-       return kmem_start;
 }
 
 static struct tty_driver dev_tty_driver, dev_syscons_driver;
@@ -2109,7 +2108,7 @@ static struct tty_driver dev_console_driver;
  * Ok, now we can initialize the rest of the tty devices and can count
  * on memory allocations, interrupts etc..
  */
-int __init tty_init(void)
+void __init tty_init(void)
 {
        if (sizeof(struct tty_struct) > PAGE_SIZE)
                panic("size of tty structure > PAGE_SIZE!");
@@ -2220,5 +2219,4 @@ int __init tty_init(void)
 #ifdef CONFIG_VT
        vcs_init();
 #endif
-       return 0;
 }
index e1a4a3c917841cf8678d96e449b976bac5e97f5e..aac698b0d41735210a3830dc65ae740b98dd208a 100644 (file)
@@ -1495,7 +1495,7 @@ speedo_rx(struct net_device *dev)
                        rxf = sp->rx_ringp[entry] = (struct RxFD *)skb->tail;
                        skb->dev = dev;
                        skb_reserve(skb, sizeof(struct RxFD));
-                       rxf->rx_buf_addr = virt_to_le32bus(skb->tail);
+                       rxf->rx_buf_addr = virt_to_bus(skb->tail);
                } else {
                        rxf = sp->rx_ringp[entry];
                }
index 09891202cdd85fdcaa6de534fc7d9b96b91fadb2..b036deee4c355fa725b9504c182c52673fe095c9 100644 (file)
@@ -81,6 +81,7 @@ static int full_duplex[MAX_UNITS] = {-1, -1, -1, -1, -1, -1, -1, -1};
 #endif
 
 #include <linux/kernel.h>
+#include <linux/version.h>
 #include <linux/sched.h>
 #include <linux/string.h>
 #include <linux/timer.h>
index 21275376c1cf5cf642d13b88541a6debae7f751f..a05f54bb337a8c1372206d69420892f351463be3 100644 (file)
@@ -111,11 +111,6 @@ static const int multicast_filter_limit = 32;
 
 #ifdef MODULE
 char kernel_version[] = UTS_RELEASE;
-#else
-#ifndef __alpha__
-#define ioremap vremap
-#define iounmap vfree
-#endif
 #endif
 #if defined(MODULE) && LINUX_VERSION_CODE > 0x20115
 MODULE_AUTHOR("Donald Becker <becker@cesdis.gsfc.nasa.gov>");
index dffd765590bda3c3d1b9a4bbe9a8adf175044bc1..8bc2cfc1c90c4f7abeb6ca229f9fd3d1d01658f9 100644 (file)
 #define NAK_TIMEOUT (HZ)                               /* stall wait for printer */
 #define MAX_RETRY_COUNT ((60*60*HZ)/NAK_TIMEOUT)       /* should not take 1 minute a page! */
 
+#define BIG_BUF_SIZE                   8192
+
+/*
+ * USB Printer Requests
+ */
+#define USB_PRINTER_REQ_GET_DEVICE_ID  0
+#define USB_PRINTER_REQ_GET_PORT_STATUS        1
+#define USB_PRINTER_REQ_SOFT_RESET     2
+
 #define MAX_PRINTERS   8
 
 struct pp_usb_data {
        struct usb_device       *pusb_dev;
-       __u8                    isopen;                 /* nz if open */
-       __u8                    noinput;                /* nz if no input stream */
+       __u8                    isopen;                 /* True if open */
+       __u8                    noinput;                /* True if no input stream */
        __u8                    minor;                  /* minor number of device */
        __u8                    status;                 /* last status from device */
        int                     maxin, maxout;          /* max transfer size in and out */
        char                    *obuf;                  /* transfer buffer (out only) */
        wait_queue_head_t       wait_q;                 /* for timeouts */
        unsigned int            last_error;             /* save for checking */
+       int                     bulk_in_ep;             /* Bulk IN endpoint */
+       int                     bulk_out_ep;            /* Bulk OUT endpoint */
+       int                     bulk_in_index;          /* endpoint[bulk_in_index] */
+       int                     bulk_out_index;         /* endpoint[bulk_out_index] */
 };
 
 static struct pp_usb_data *minor_data[MAX_PRINTERS];
 
 #define PPDATA(x) ((struct pp_usb_data *)(x))
 
-unsigned char printer_read_status(struct pp_usb_data *p)
+static unsigned char printer_read_status(struct pp_usb_data *p)
 {
        __u8 status;
-       devrequest dr;
        struct usb_device *dev = p->pusb_dev;
 
-       dr.requesttype = USB_TYPE_CLASS | USB_RT_INTERFACE | 0x80;
-       dr.request = 1;
-       dr.value = 0;
-       dr.index = 0;
-       dr.length = 1;
-       if (dev->bus->op->control_msg(dev, usb_rcvctrlpipe(dev,0), &dr, &status, 1, HZ)) {
-               return 0;
+       if (usb_control_msg(dev, usb_rcvctrlpipe(dev,0),
+               USB_PRINTER_REQ_GET_PORT_STATUS,
+               USB_TYPE_CLASS | USB_RT_INTERFACE | USB_DIR_IN,
+               0, 0, &status, 1, HZ)) {
+               return 0;
        }
        return status;
 }
@@ -90,24 +100,21 @@ static int printer_check_status(struct pp_usb_data *p)
        return status;
 }
 
-void printer_reset(struct pp_usb_data *p)
+static void printer_reset(struct pp_usb_data *p)
 {
-       devrequest dr;
        struct usb_device *dev = p->pusb_dev;
 
-       dr.requesttype = USB_TYPE_CLASS | USB_RECIP_OTHER;
-       dr.request = 2;
-       dr.value = 0;
-       dr.index = 0;
-       dr.length = 0;
-       dev->bus->op->control_msg(dev, usb_sndctrlpipe(dev,0), &dr, NULL, 0, HZ);
+       usb_control_msg(dev, usb_sndctrlpipe(dev,0),
+               USB_PRINTER_REQ_SOFT_RESET,
+               USB_TYPE_CLASS | USB_RECIP_OTHER,
+               0, 0, NULL, 0, HZ);
 }
 
 static int open_printer(struct inode * inode, struct file * file)
 {
        struct pp_usb_data *p;
 
-       if(MINOR(inode->i_rdev) >= MAX_PRINTERS ||
+       if (MINOR(inode->i_rdev) >= MAX_PRINTERS ||
           !minor_data[MINOR(inode->i_rdev)]) {
                return -ENODEV;
        }
@@ -141,7 +148,7 @@ static int close_printer(struct inode * inode, struct file * file)
        p->isopen = 0;
        file->private_data = NULL;
        /* free the resources if the printer is no longer around */
-       if(!p->pusb_dev) {
+       if (!p->pusb_dev) {
                minor_data[p->minor] = NULL;
                kfree(p);
        }
@@ -158,12 +165,7 @@ static ssize_t write_printer(struct file * file,
        unsigned long partial;
        int result = USB_ST_NOERROR;
        int maxretry;
-       int endpoint_num;
-       struct usb_interface_descriptor *interface;
        
-       interface = p->pusb_dev->config->interface->altsetting;
-       endpoint_num = (interface->endpoint[1].bEndpointAddress & 0x0f);
-
        do {
                char *obuf = p->obuf;
                unsigned long thistime;
@@ -179,7 +181,7 @@ static ssize_t write_printer(struct file * file,
                                return bytes_written ? bytes_written : -EINTR;
                        }
                        result = p->pusb_dev->bus->op->bulk_msg(p->pusb_dev,
-                                        usb_sndbulkpipe(p->pusb_dev, endpoint_num),
+                                        usb_sndbulkpipe(p->pusb_dev, p->bulk_out_ep),
                                         obuf, thistime, &partial, HZ*20);
                        if (partial) {
                                obuf += partial;
@@ -187,7 +189,7 @@ static ssize_t write_printer(struct file * file,
                                maxretry = MAX_RETRY_COUNT;
                        }
                        if (result == USB_ST_TIMEOUT) { /* NAK - so hold for a while */
-                               if(!maxretry--)
+                               if (!maxretry--)
                                        return -ETIME;
                                 interruptible_sleep_on_timeout(&p->wait_q, NAK_TIMEOUT);
                                continue;
@@ -214,21 +216,15 @@ static ssize_t read_printer(struct file * file,
        char * buffer, size_t count, loff_t *ppos)
 {
        struct pp_usb_data *p = file->private_data;
-       int read_count;
+       int read_count = 0;
        int this_read;
        char buf[64];
        unsigned long partial;
        int result;
-       int endpoint_num;
-       struct usb_interface_descriptor *interface;
        
-       interface = p->pusb_dev->config->interface->altsetting;
-       endpoint_num = (interface->endpoint[0].bEndpointAddress & 0x0f);
-
        if (p->noinput)
                return -EINVAL;
 
-       read_count = 0;
        while (count) {
                if (signal_pending(current)) {
                        return read_count ? read_count : -EINTR;
@@ -238,7 +234,7 @@ static ssize_t read_printer(struct file * file,
                this_read = (count > sizeof(buf)) ? sizeof(buf) : count;
 
                result = p->pusb_dev->bus->op->bulk_msg(p->pusb_dev,
-                         usb_rcvbulkpipe(p->pusb_dev, endpoint_num),
+                         usb_rcvbulkpipe(p->pusb_dev, p->bulk_in_ep),
                          buf, this_read, &partial, HZ*20);
 
                /* unlike writes, we don't retry a NAK, just stop now */
@@ -266,8 +262,8 @@ static int printer_probe(struct usb_device *dev)
        /*
         * FIXME - this will not cope with combined printer/scanners
         */
-       if ((dev->descriptor.bDeviceClass != 7 &&
-            dev->descriptor.bDeviceClass != 0) ||
+       if ((dev->descriptor.bDeviceClass != USB_CLASS_PRINTER &&
+           dev->descriptor.bDeviceClass != 0) ||
            dev->descriptor.bNumConfigurations != 1 ||
            dev->config[0].bNumInterfaces != 1) {
                return -1;
@@ -275,34 +271,50 @@ static int printer_probe(struct usb_device *dev)
 
        interface = &dev->config[0].interface[0].altsetting[0];
 
-       /* Lets be paranoid (for the moment)*/
-       if (interface->bInterfaceClass != 7 ||
+       /* Let's be paranoid (for the moment). */
+       if (interface->bInterfaceClass != USB_CLASS_PRINTER ||
            interface->bInterfaceSubClass != 1 ||
-           (interface->bInterfaceProtocol != 2 && interface->bInterfaceProtocol != 1)||
+           (interface->bInterfaceProtocol != 2 && interface->bInterfaceProtocol != 1) ||
            interface->bNumEndpoints > 2) {
                return -1;
        }
 
-       if ((interface->endpoint[0].bEndpointAddress & 0xf0) != 0x00 ||
-           interface->endpoint[0].bmAttributes != 0x02 ||
-           (interface->bNumEndpoints > 1 && (
-                   (interface->endpoint[1].bEndpointAddress & 0xf0) != 0x80 ||
-                   interface->endpoint[1].bmAttributes != 0x02))) {
+       /* Does this (these) interface(s) support bulk transfers? */
+       if ((interface->endpoint[0].bmAttributes & USB_ENDPOINT_XFERTYPE_MASK)
+             != USB_ENDPOINT_XFER_BULK) {
                return -1;
        }
+       if ((interface->bNumEndpoints > 1) &&
+             ((interface->endpoint[1].bmAttributes & USB_ENDPOINT_XFERTYPE_MASK)
+             != USB_ENDPOINT_XFER_BULK)) {
+               return -1;
+       }
+
+       /*
+        *  Does this interface have at least one OUT endpoint
+        *  that we can write to: endpoint index 0 or 1?
+        */
+       if ((interface->endpoint[0].bEndpointAddress & USB_ENDPOINT_DIR_MASK)
+             != USB_DIR_OUT &&
+           (interface->bNumEndpoints > 1 &&
+             (interface->endpoint[1].bEndpointAddress & USB_ENDPOINT_DIR_MASK)
+             != USB_DIR_OUT)) {
+               return -1;
+       }
 
        for (i=0; i<MAX_PRINTERS; i++) {
                if (!minor_data[i])
                        break;
        }
        if (i >= MAX_PRINTERS) {
+               printk("No minor table space available for USB Printer\n");
                return -1;
        }
 
        printk(KERN_INFO "USB Printer found at address %d\n", dev->devnum);
 
        if (!(dev->private = kmalloc(sizeof(struct pp_usb_data), GFP_KERNEL))) {
-               printk( KERN_DEBUG "usb_printer: no memory!\n");
+               printk(KERN_DEBUG "usb_printer: no memory!\n");
                return -1;
        }
 
@@ -310,48 +322,63 @@ static int printer_probe(struct usb_device *dev)
        minor_data[i] = PPDATA(dev->private);
        minor_data[i]->minor = i;
        minor_data[i]->pusb_dev = dev;
-       /* The max packet size can't be more than 64 (& will be 64 for
-        * any decent bulk device); this calculation was silly.  -greg
-        * minor_data[i]->maxout = interface->endpoint[0].wMaxPacketSize * 16;
-        */
-       minor_data[i]->maxout = 8192;
-       if (minor_data[i]->maxout > PAGE_SIZE) {
-                minor_data[i]->maxout = PAGE_SIZE;
-       }
-       if (interface->bInterfaceProtocol != 2)
+       minor_data[i]->maxout = (BIG_BUF_SIZE > PAGE_SIZE) ? PAGE_SIZE : BIG_BUF_SIZE;
+       if (interface->bInterfaceProtocol != 2)         /* if not bidirectional */
                minor_data[i]->noinput = 1;
-       else {
-               minor_data[i]->maxin = interface->endpoint[1].wMaxPacketSize;
+
+       minor_data[i]->bulk_out_index =
+               ((interface->endpoint[0].bEndpointAddress & USB_ENDPOINT_DIR_MASK)
+                 == USB_DIR_OUT) ? 0 : 1;
+       minor_data[i]->bulk_in_index = minor_data[i]->noinput ? -1 :
+               (minor_data[i]->bulk_out_index == 0) ? 1 : 0;
+       minor_data[i]->bulk_in_ep = minor_data[i]->noinput ? -1 :
+               interface->endpoint[minor_data[i]->bulk_in_index].bEndpointAddress &
+               USB_ENDPOINT_NUMBER_MASK;
+       minor_data[i]->bulk_out_ep =
+               interface->endpoint[minor_data[i]->bulk_out_index].bEndpointAddress &
+               USB_ENDPOINT_NUMBER_MASK;
+       if (interface->bInterfaceProtocol == 2) {       /* if bidirectional */
+               minor_data[i]->maxin =
+                       interface->endpoint[minor_data[i]->bulk_in_index].wMaxPacketSize;
        }
 
         if (usb_set_configuration(dev, dev->config[0].bConfigurationValue)) {
                printk(KERN_INFO "  Failed usb_set_configuration: printer\n");
                return -1;
        }
+
+       printk(KERN_INFO "USB Printer Summary:\n");
+       printk(KERN_INFO "index=%d, maxout=%d, noinput=%d\n",
+               i, minor_data[i]->maxout, minor_data[i]->noinput);
+       printk(KERN_INFO "bulk_in_ix=%d, bulk_in_ep=%d, bulk_out_ix=%d, bulk_out_ep=%d\n",
+               minor_data[i]->bulk_in_index,
+               minor_data[i]->bulk_in_ep,
+               minor_data[i]->bulk_out_index,
+               minor_data[i]->bulk_out_ep);
+
 #if 0
        {
                __u8 status;
                __u8 ieee_id[64];
-               devrequest dr;
-
-               /* Lets get the device id if possible */
-               dr.requesttype = USB_TYPE_CLASS | USB_RT_INTERFACE | 0x80;
-               dr.request = 0;
-               dr.value = 0;
-               dr.index = 0;
-               dr.length = sizeof(ieee_id) - 1;
-               if (dev->bus->op->control_msg(dev, usb_rcvctrlpipe(dev,0), &dr, ieee_id, sizeof(ieee_id)-1, HZ) == 0) {
+
+               /* Let's get the device id if possible. */
+               if (usb_control_msg(dev, usb_rcvctrlpipe(dev,0),
+                   USB_PRINTER_REQ_GET_DEVICE_ID,
+                   USB_TYPE_CLASS | USB_RT_INTERFACE | USB_DIR_IN,
+                   0, 0, ieee_id,
+                   sizeof(ieee_id)-1, HZ) == 0) {
                        if (ieee_id[1] < sizeof(ieee_id) - 1)
                                ieee_id[ieee_id[1]+2] = '\0';
                        else
                                ieee_id[sizeof(ieee_id)-1] = '\0';
-                       printk(KERN_INFO "  Printer ID is %s\n", &ieee_id[2]);
+                       printk(KERN_INFO "  USB Printer ID is %s\n",
+                               &ieee_id[2]);
                }
                status = printer_read_status(PPDATA(dev->private));
                printk(KERN_INFO "  Status is %s,%s,%s\n",
-                      (status & 0x10) ? "Selected" : "Not Selected",
-                      (status & 0x20) ? "No Paper" : "Paper",
-                      (status & 0x08) ? "No Error" : "Error");
+                      (status & LP_PSELECD) ? "Selected" : "Not Selected",
+                      (status & LP_POUTPA)  ? "No Paper" : "Paper",
+                      (status & LP_PERRORP) ? "No Error" : "Error");
        }
 #endif
        return 0;
@@ -397,7 +424,13 @@ static struct usb_driver printer_driver = {
 
 int usb_printer_init(void)
 {
-       usb_register(&printer_driver);
+       if (usb_register(&printer_driver)) {
+               printk(KERN_ERR "USB Printer driver cannot register: "
+                       "minor number %d already in use\n",
+                       printer_driver.minor);
+               return 1;
+       }
+
        printk(KERN_INFO "USB Printer support registered.\n");
        return 0;
 }
index f3c3f11b7b1cd337b96600e11367fd8dec1c59f4..c065cfdb2acf7cd927e237010a3691093910cbf5 100644 (file)
@@ -97,7 +97,7 @@ static kmem_cache_t *bh_cachep;
 static int grow_buffers(int size);
 
 /* This is used by some architectures to estimate available memory. */
-atomic_t buffermem = ATOMIC_INIT(0);
+atomic_t buffermem_pages = ATOMIC_INIT(0);
 
 /* Here is the parameter block for the bdflush process. If you add or
  * remove any of the parameters, make sure to update kernel/sysctl.c.
@@ -827,7 +827,7 @@ static int balance_dirty_state(kdev_t dev)
        unsigned long dirty, tot, hard_dirty_limit, soft_dirty_limit;
 
        dirty = size_buffers_type[BUF_DIRTY] >> PAGE_SHIFT;
-       tot = nr_lru_pages + nr_free_pages - nr_free_bigpages;
+       tot = nr_lru_pages + nr_free_pages + nr_free_highpages;
        hard_dirty_limit = tot * bdf_prm.b_un.nfract / 100;
        soft_dirty_limit = hard_dirty_limit >> 1;
 
@@ -1267,7 +1267,7 @@ int block_flushpage(struct inode *inode, struct page *page, unsigned long offset
         */
        if (!offset) {
                if (!try_to_free_buffers(page)) {
-                       atomic_add(PAGE_CACHE_SIZE, &buffermem);
+                       atomic_inc(&buffermem_pages);
                        return 0;
                }
        }
@@ -1834,12 +1834,12 @@ int brw_kiovec(int rw, int nr, struct kiobuf *iovec[],
                dprintk ("iobuf %d %d %d\n", offset, length, size);
 
                for (pageind = 0; pageind < iobuf->nr_pages; pageind++) {
-                       page = iobuf->pagelist[pageind];
                        map  = iobuf->maplist[pageind];
-                       if (map && PageBIGMEM(map)) {
+                       if (map && PageHighMem(map)) {
                                err = -EIO;
                                goto error;
                        }
+                       page = page_address(map);
 
                        while (length > 0) {
                                blocknr = b[bufind++];
@@ -2115,7 +2115,7 @@ static int grow_buffers(int size)
        page_map = mem_map + MAP_NR(page);
        page_map->buffers = bh;
        lru_cache_add(page_map);
-       atomic_add(PAGE_SIZE, &buffermem);
+       atomic_inc(&buffermem_pages);
        return 1;
 
 no_buffer_head:
@@ -2208,7 +2208,8 @@ void show_buffers(void)
        int nlist;
        static char *buf_types[NR_LIST] = { "CLEAN", "LOCKED", "DIRTY" };
 
-       printk("Buffer memory:   %6dkB\n", atomic_read(&buffermem) >> 10);
+       printk("Buffer memory:   %6dkB\n",
+                       atomic_read(&buffermem_pages) << (PAGE_SHIFT-10));
 
 #ifdef __SMP__ /* trylock does nothing on UP and so we could deadlock */
        if (!spin_trylock(&lru_list_lock))
@@ -2246,7 +2247,7 @@ void show_buffers(void)
  * Use gfp() for the hash table to decrease TLB misses, use
  * SLAB cache for buffer heads.
  */
-void __init buffer_init(unsigned long memory_size)
+void __init buffer_init(unsigned long mempages)
 {
        int order, i;
        unsigned int nr_hash;
@@ -2254,9 +2255,11 @@ void __init buffer_init(unsigned long memory_size)
        /* The buffer cache hash table is less important these days,
         * trim it a bit.
         */
-       memory_size >>= 14;
-       memory_size *= sizeof(struct buffer_head *);
-       for (order = 0; (PAGE_SIZE << order) < memory_size; order++)
+       mempages >>= 14;
+
+       mempages *= sizeof(struct buffer_head *);
+
+       for (order = 0; (1 << order) < mempages; order++)
                ;
 
        /* try to allocate something until we get it or we're asking
index 5f9c066a45e00b5e98f1816346b737109cfb71f4..b6f7a7203b5bfa4b201139aca548dcee6b7f2886 100644 (file)
@@ -420,7 +420,7 @@ int shrink_dcache_memory(int priority, unsigned int gfp_mask)
                unlock_kernel();
                /* FIXME: kmem_cache_shrink here should tell us
                   the number of pages freed, and it should
-                  work in a __GFP_DMA/__GFP_BIGMEM behaviour
+                  work in a __GFP_DMA/__GFP_HIGHMEM behaviour
                   to free only the interesting pages in
                   function of the needs of the current allocation. */
                kmem_cache_shrink(dentry_cache);
index f56d8b3523e97cc75b87dc592c150c0ae007527a..1961ec33a49a0d0a10b438f8b496a0f0dd1a24ea 100644 (file)
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -31,6 +31,8 @@
 #include <linux/fcntl.h>
 #include <linux/smp_lock.h>
 #include <linux/init.h>
+#include <linux/pagemap.h>
+#include <linux/highmem.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
@@ -212,20 +214,42 @@ int copy_strings(int argc,char ** argv, struct linux_binprm *bprm)
                /* XXX: add architecture specific overflow check here. */ 
 
                pos = bprm->p;
-               while (len>0) {
-                       char *pag;
+               while (len > 0) {
+                       char *kaddr;
+                       int i, new, err;
+                       struct page *page;
                        int offset, bytes_to_copy;
 
                        offset = pos % PAGE_SIZE;
-                       if (!(pag = (char *) bprm->page[pos/PAGE_SIZE]) &&
-                           !(pag = (char *) bprm->page[pos/PAGE_SIZE] =
-                             (unsigned long *) get_free_page(GFP_USER))) 
-                               return -ENOMEM; 
+                       i = pos/PAGE_SIZE;
+                       page = bprm->page[i];
+                       new = 0;
+                       if (!page) {
+                               /*
+                                * Cannot yet use highmem page because
+                                * we cannot sleep with a kmap held.
+                                */
+                               page = __get_pages(GFP_USER, 0);
+                               bprm->page[i] = page;
+                               if (!page)
+                                       return -ENOMEM;
+                               new = 1;
+                       }
+                       kaddr = (char *)kmap(page, KM_WRITE);
 
+                       if (new && offset)
+                               memset(kaddr, 0, offset);
                        bytes_to_copy = PAGE_SIZE - offset;
-                       if (bytes_to_copy > len)
+                       if (bytes_to_copy > len) {
                                bytes_to_copy = len;
-                       if (copy_from_user(pag + offset, str, bytes_to_copy)) 
+                               if (new)
+                                       memset(kaddr+offset+len, 0, PAGE_SIZE-offset-len);
+                       }
+                       err = copy_from_user(kaddr + offset, str, bytes_to_copy);
+                       flush_page_to_ram(kaddr);
+                       kunmap((unsigned long)kaddr, KM_WRITE);
+
+                       if (err)
                                return -EFAULT; 
 
                        pos += bytes_to_copy;
@@ -647,14 +671,22 @@ void remove_arg_zero(struct linux_binprm *bprm)
 {
        if (bprm->argc) {
                unsigned long offset;
-               char * page;
+               char * kaddr;
+               struct page *page;
+
                offset = bprm->p % PAGE_SIZE;
-               page = (char*)bprm->page[bprm->p/PAGE_SIZE];
-               while(bprm->p++,*(page+offset++))
-                       if(offset==PAGE_SIZE){
-                               offset=0;
-                               page = (char*)bprm->page[bprm->p/PAGE_SIZE];
-                       }
+               goto inside;
+
+               while (bprm->p++, *(kaddr+offset++)) {
+                       if (offset != PAGE_SIZE)
+                               continue;
+                       offset = 0;
+                       kunmap((unsigned long)kaddr, KM_WRITE);
+inside:
+                       page = bprm->page[bprm->p/PAGE_SIZE];
+                       kaddr = (char *)kmap(page, KM_WRITE);
+               }
+               kunmap((unsigned long)kaddr, KM_WRITE);
                bprm->argc--;
        }
 }
@@ -683,8 +715,8 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
                bprm->dentry = NULL;
 
                bprm_loader.p = PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *);
-               for (i=0 ; i<MAX_ARG_PAGES ; i++)       /* clear page-table */
-                    bprm_loader.page[i] = 0;
+               for (i = 0 ; i < MAX_ARG_PAGES ; i++)   /* clear page-table */
+                    bprm_loader.page[i] = NULL;
 
                dentry = open_namei(dynloader[0], 0, 0);
                retval = PTR_ERR(dentry);
@@ -800,8 +832,9 @@ out:
 
        /* Assumes that free_page() can take a NULL argument. */ 
        /* I hope this is ok for all architectures */ 
-       for (i=0 ; i<MAX_ARG_PAGES ; i++)
-               free_page(bprm.page[i]);
+       for (i = 0 ; i < MAX_ARG_PAGES ; i++)
+               if (bprm.page[i])
+                       __free_page(bprm.page[i]);
 
        return retval;
 }
index fd33dc8b8482f64bfa1dd365783772a94a239460..d62fb3ef32d552a912e5e956f377796c0664adfb 100644 (file)
--- a/fs/file.c
+++ b/fs/file.c
@@ -16,7 +16,7 @@
 
 
 /*
- * Allocate an fd array, using get_free_page() if possible.
+ * Allocate an fd array, using __get_free_page() if possible.
  * Note: the array isn't cleared at allocation time.
  */
 struct file ** alloc_fd_array(int num)
@@ -129,7 +129,7 @@ out:
 }
 
 /*
- * Allocate an fdset array, using get_free_page() if possible.
+ * Allocate an fdset array, using __get_free_page() if possible.
  * Note: the array isn't cleared at allocation time.
  */
 fd_set * alloc_fdset(int num)
index 337770ed04f633f22f394f52d25bb18559a5b562..f03295d5c1af3942865f9a66c570845c0235ff79 100644 (file)
@@ -89,6 +89,7 @@ static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
                memset(inode, 0, sizeof(*inode));
                init_waitqueue_head(&inode->i_wait);
                INIT_LIST_HEAD(&inode->i_hash);
+               INIT_LIST_HEAD(&inode->i_pages);
                INIT_LIST_HEAD(&inode->i_dentry);
                sema_init(&inode->i_sem, 1);
                spin_lock_init(&inode->i_shared_lock);
@@ -401,7 +402,7 @@ int shrink_icache_memory(int priority, int gfp_mask)
                prune_icache(count);
                /* FIXME: kmem_cache_shrink here should tell us
                   the number of pages freed, and it should
-                  work in a __GFP_DMA/__GFP_BIGMEM behaviour
+                  work in a __GFP_DMA/__GFP_HIGHMEM behaviour
                   to free only the interesting pages in
                   function of the needs of the current allocation. */
                kmem_cache_shrink(inode_cachep);
index b46a13bfd8eb62aea940a2e7f6e68ce16d785834..eaabf2f7c5c0de692791994d0bc3aa2d391c7b34 100644 (file)
@@ -50,7 +50,6 @@ int alloc_kiovec(int nr, struct kiobuf **bufp)
                init_waitqueue_head(&iobuf->wait_queue);
                iobuf->end_io = simple_wakeup_kiobuf;
                iobuf->array_len = KIO_STATIC_PAGES;
-               iobuf->pagelist  = iobuf->page_array;
                iobuf->maplist   = iobuf->map_array;
                *bufp++ = iobuf;
        }
@@ -65,50 +64,35 @@ void free_kiovec(int nr, struct kiobuf **bufp)
        
        for (i = 0; i < nr; i++) {
                iobuf = bufp[i];
-               if (iobuf->array_len > KIO_STATIC_PAGES) {
-                       kfree (iobuf->pagelist);
+               if (iobuf->array_len > KIO_STATIC_PAGES)
                        kfree (iobuf->maplist);
-               }
                kmem_cache_free(kiobuf_cachep, bufp[i]);
        }
 }
 
 int expand_kiobuf(struct kiobuf *iobuf, int wanted)
 {
-       unsigned long * pagelist;
        struct page ** maplist;
        
        if (iobuf->array_len >= wanted)
                return 0;
        
-       pagelist = (unsigned long *) 
-               kmalloc(wanted * sizeof(unsigned long), GFP_KERNEL);
-       if (!pagelist)
-               return -ENOMEM;
-       
        maplist = (struct page **) 
                kmalloc(wanted * sizeof(struct page **), GFP_KERNEL);
-       if (!maplist) {
-               kfree(pagelist);
+       if (!maplist)
                return -ENOMEM;
-       }
 
        /* Did it grow while we waited? */
        if (iobuf->array_len >= wanted) {
-               kfree(pagelist);
                kfree(maplist);
                return 0;
        }
        
-       memcpy (pagelist, iobuf->pagelist, wanted * sizeof(unsigned long));
        memcpy (maplist,  iobuf->maplist,   wanted * sizeof(struct page **));
 
-       if (iobuf->array_len > KIO_STATIC_PAGES) {
-               kfree (iobuf->pagelist);
+       if (iobuf->array_len > KIO_STATIC_PAGES)
                kfree (iobuf->maplist);
-       }
        
-       iobuf->pagelist  = pagelist;
        iobuf->maplist   = maplist;
        iobuf->array_len = wanted;
        return 0;
index 6515e0d523c95ee660c97acca0c23e0a209d487f..b7ec225ac8b76f5f4bc587a0bc745a2ba76e0f21 100644 (file)
@@ -308,8 +308,7 @@ static struct page *try_to_get_dirent_page(struct file *file, __u32 cookie, int
        struct nfs_readdirres rd_res;
        struct dentry *dentry = file->f_dentry;
        struct inode *inode = dentry->d_inode;
-       struct page *page, **hash;
-       unsigned long page_cache;
+       struct page *page, **hash, *page_cache;
        long offset;
        __u32 *cookiep;
 
@@ -341,14 +340,14 @@ repeat:
                goto unlock_out;
        }
 
-       page = page_cache_entry(page_cache);
+       page = page_cache;
        if (add_to_page_cache_unique(page, inode, offset, hash)) {
                page_cache_release(page);
                goto repeat;
        }
 
        rd_args.fh = NFS_FH(dentry);
-       rd_res.buffer = (char *)page_cache;
+       rd_res.buffer = (char *)page_address(page_cache);
        rd_res.bufsiz = PAGE_CACHE_SIZE;
        rd_res.cookie = *cookiep;
        do {
index 6cd892740597e2b13d9f4e52c616ce88d6db30e2..6b0d0f05b904e66e49e6a110e826be0a417c081a 100644 (file)
@@ -59,8 +59,7 @@ struct inode_operations nfs_symlink_inode_operations = {
 static struct page *try_to_get_symlink_page(struct dentry *dentry, struct inode *inode)
 {
        struct nfs_readlinkargs rl_args;
-       struct page *page, **hash;
-       unsigned long page_cache;
+       struct page *page, **hash, *page_cache;
 
        page = NULL;
        page_cache = page_cache_alloc();
@@ -75,7 +74,7 @@ repeat:
                goto unlock_out;
        }
 
-       page = page_cache_entry(page_cache);
+       page = page_cache;
        if (add_to_page_cache_unique(page, inode, 0, hash)) {
                page_cache_release(page);
                goto repeat;
@@ -86,7 +85,7 @@ repeat:
         * XDR response verification will NULL terminate it.
         */
        rl_args.fh = NFS_FH(dentry);
-       rl_args.buffer = (const void *)page_cache;
+       rl_args.buffer = (const void *)page_address(page_cache);
        if (rpc_call(NFS_CLIENT(inode), NFSPROC_READLINK,
                     &rl_args, NULL, 0) < 0)
                goto error;
index f4cd01a7c4e92e5f488097df12e7ddd5f445f154..101a0ad9c0bd1bc59440cd15e6213026bb3309c0 100644 (file)
@@ -386,8 +386,8 @@ static int get_meminfo(char * buffer)
                i.sharedram >> 10,
                i.bufferram >> 10,
                atomic_read(&page_cache_size) << (PAGE_SHIFT - 10),
-               i.totalbig >> 10,
-               i.freebig >> 10,
+               i.totalhigh >> 10,
+               i.freehigh >> 10,
                i.totalswap >> 10,
                i.freeswap >> 10);
 }
@@ -407,7 +407,7 @@ static int get_cmdline(char * buffer)
        return sprintf(buffer, "%s\n", saved_command_line);
 }
 
-static unsigned long get_phys_addr(struct mm_struct * mm, unsigned long ptr)
+static struct page * get_phys_page(struct mm_struct * mm, unsigned long ptr)
 {
        pgd_t *page_dir;
        pmd_t *page_middle;
@@ -434,41 +434,41 @@ static unsigned long get_phys_addr(struct mm_struct * mm, unsigned long ptr)
        pte = *pte_offset(page_middle,ptr);
        if (!pte_present(pte))
                return 0;
-       return pte_page(pte) + (ptr & ~PAGE_MASK);
+       return pte_page(pte);
 }
 
-#include <linux/bigmem.h>
+#include <linux/highmem.h>
 
 static int get_array(struct mm_struct *mm, unsigned long start, unsigned long end, char * buffer)
 {
        unsigned long addr;
        int size = 0, result = 0;
-       char c;
+       char *buf, c;
 
        if (start >= end)
                return result;
        for (;;) {
-               addr = get_phys_addr(mm, start);
-               if (!addr)
+               struct page *page = get_phys_page(mm, start);
+               if (!page)
                        return result;
-               addr = kmap(addr, KM_READ);
+               addr = kmap(page, KM_READ);
+               buf = (char *) (addr + (start & ~PAGE_MASK));
                do {
-                       c = *(char *) addr;
+                       c = *buf;
                        if (!c)
                                result = size;
-                       if (size < PAGE_SIZE)
-                               buffer[size++] = c;
-                       else {
+                       if (size >= PAGE_SIZE) {
                                kunmap(addr, KM_READ);
                                return result;
                        }
-                       addr++;
+                       buffer[size++] = c;
+                       buf++;
                        start++;
                        if (!c && start >= end) {
                                kunmap(addr, KM_READ);
                                return result;
                        }
-               } while (addr & ~PAGE_MASK);
+               } while (~PAGE_MASK & (unsigned long)buf);
                kunmap(addr, KM_READ);
        }
        return result;
index f9fcb0970cc41e56ac2f8df6b1e3f6b2c6cfb139..90cd797224dfc3aa9a49e9f4540a68ed3b387fe3 100644 (file)
@@ -10,7 +10,7 @@
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/proc_fs.h>
-#include <linux/bigmem.h>
+#include <linux/highmem.h>
 
 #include <asm/page.h>
 #include <asm/uaccess.h>
@@ -79,9 +79,10 @@ static ssize_t mem_read(struct file * file, char * buf,
        pgd_t *page_dir;
        pmd_t *page_middle;
        pte_t pte;
-       char * page;
+       struct page * page;
        struct task_struct * tsk;
        unsigned long addr;
+       unsigned long maddr; /* temporary mapped address */
        char *tmp;
        ssize_t scount, i;
 
@@ -102,7 +103,7 @@ static ssize_t mem_read(struct file * file, char * buf,
                if (pgd_none(*page_dir))
                        break;
                if (pgd_bad(*page_dir)) {
-                       printk("Bad page dir entry %08lx\n", pgd_val(*page_dir));
+                       pgd_ERROR(*page_dir);
                        pgd_clear(page_dir);
                        break;
                }
@@ -110,20 +111,20 @@ static ssize_t mem_read(struct file * file, char * buf,
                if (pmd_none(*page_middle))
                        break;
                if (pmd_bad(*page_middle)) {
-                       printk("Bad page middle entry %08lx\n", pmd_val(*page_middle));
+                       pmd_ERROR(*page_middle);
                        pmd_clear(page_middle);
                        break;
                }
                pte = *pte_offset(page_middle,addr);
                if (!pte_present(pte))
                        break;
-               page = (char *) pte_page(pte) + (addr & ~PAGE_MASK);
+               page = pte_page(pte);
                i = PAGE_SIZE-(addr & ~PAGE_MASK);
                if (i > scount)
                        i = scount;
-               page = (char *) kmap((unsigned long) page, KM_READ);
-               copy_to_user(tmp, page, i);
-               kunmap((unsigned long) page, KM_READ);
+               maddr = kmap(page, KM_READ);
+               copy_to_user(tmp, (char *)maddr + (addr & ~PAGE_MASK), i);
+               kunmap(maddr, KM_READ);
                addr += i;
                tmp += i;
                scount -= i;
@@ -141,9 +142,10 @@ static ssize_t mem_write(struct file * file, char * buf,
        pgd_t *page_dir;
        pmd_t *page_middle;
        pte_t pte;
-       char * page;
+       struct page * page;
        struct task_struct * tsk;
        unsigned long addr;
+       unsigned long maddr; /* temporary mapped address */
        char *tmp;
        long i;
 
@@ -159,7 +161,7 @@ static ssize_t mem_write(struct file * file, char * buf,
                if (pgd_none(*page_dir))
                        break;
                if (pgd_bad(*page_dir)) {
-                       printk("Bad page dir entry %08lx\n", pgd_val(*page_dir));
+                       pgd_ERROR(*page_dir);
                        pgd_clear(page_dir);
                        break;
                }
@@ -167,7 +169,7 @@ static ssize_t mem_write(struct file * file, char * buf,
                if (pmd_none(*page_middle))
                        break;
                if (pmd_bad(*page_middle)) {
-                       printk("Bad page middle entry %08lx\n", pmd_val(*page_middle));
+                       pmd_ERROR(*page_middle);
                        pmd_clear(page_middle);
                        break;
                }
@@ -176,13 +178,13 @@ static ssize_t mem_write(struct file * file, char * buf,
                        break;
                if (!pte_write(pte))
                        break;
-               page = (char *) pte_page(pte) + (addr & ~PAGE_MASK);
+               page = pte_page(pte);
                i = PAGE_SIZE-(addr & ~PAGE_MASK);
                if (i > count)
                        i = count;
-               page = (unsigned long) kmap((unsigned long) page, KM_WRITE);
-               copy_from_user(page, tmp, i);
-               kunmap((unsigned long) page, KM_WRITE);
+               maddr = kmap(page, KM_WRITE);
+               copy_from_user((char *)maddr + (addr & ~PAGE_MASK), tmp, i);
+               kunmap(maddr, KM_WRITE);
                addr += i;
                tmp += i;
                count -= i;
@@ -248,14 +250,14 @@ int mem_mmap(struct file * file, struct vm_area_struct * vma)
                if (pgd_none(*src_dir))
                        return -EINVAL;
                if (pgd_bad(*src_dir)) {
-                       printk("Bad source page dir entry %08lx\n", pgd_val(*src_dir));
+                       pgd_ERROR(*src_dir);
                        return -EINVAL;
                }
                src_middle = pmd_offset(src_dir, stmp);
                if (pmd_none(*src_middle))
                        return -EINVAL;
                if (pmd_bad(*src_middle)) {
-                       printk("Bad source page middle entry %08lx\n", pmd_val(*src_middle));
+                       pmd_ERROR(*src_middle);
                        return -EINVAL;
                }
                src_table = pte_offset(src_middle, stmp);
@@ -301,9 +303,9 @@ int mem_mmap(struct file * file, struct vm_area_struct * vma)
 
                set_pte(src_table, pte_mkdirty(*src_table));
                set_pte(dest_table, *src_table);
-               mapnr = MAP_NR(pte_page(*src_table));
+               mapnr = pte_pagenr(*src_table);
                if (mapnr < max_mapnr)
-                       get_page(mem_map + MAP_NR(pte_page(*src_table)));
+                       get_page(mem_map + pte_pagenr(*src_table));
 
                stmp += PAGE_SIZE;
                dtmp += PAGE_SIZE;
diff --git a/include/asm-i386/bigmem.h b/include/asm-i386/bigmem.h
deleted file mode 100644 (file)
index 1c5c4cf..0000000
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * bigmem.h:   virtual kernel memory mappings for big memory
- *
- * Used in CONFIG_BIGMEM systems for memory pages which        are not
- * addressable by direct kernel virtual adresses.
- *
- * Copyright (C) 1999 Gerhard Wichert, Siemens AG
- *                   Gerhard.Wichert@pdb.siemens.de
- */
-
-#ifndef _ASM_BIGMEM_H
-#define _ASM_BIGMEM_H
-
-#include <linux/init.h>
-
-#define BIGMEM_DEBUG /* undef for production */
-
-/* declarations for bigmem.c */
-extern unsigned long bigmem_start, bigmem_end;
-extern int nr_free_bigpages;
-
-extern pte_t *kmap_pte;
-extern pgprot_t kmap_prot;
-
-extern void kmap_init(void) __init;
-
-/* kmap helper functions necessary to access the bigmem pages in kernel */
-#include <asm/pgtable.h>
-#include <asm/kmap_types.h>
-
-extern inline unsigned long kmap(unsigned long kaddr, enum km_type type)
-{
-       if (__pa(kaddr) < bigmem_start)
-               return kaddr;
-       {
-               enum fixed_addresses idx = type+KM_TYPE_NR*smp_processor_id();
-               unsigned long vaddr = __fix_to_virt(FIX_KMAP_BEGIN+idx);
-
-#ifdef BIGMEM_DEBUG
-               if (!pte_none(*(kmap_pte-idx)))
-               {
-                       __label__ here;
-               here:
-                       printk(KERN_ERR "not null pte on CPU %d from %p\n",
-                              smp_processor_id(), &&here);
-               }
-#endif
-               set_pte(kmap_pte-idx, mk_pte(kaddr & PAGE_MASK, kmap_prot));
-               __flush_tlb_one(vaddr);
-
-               return vaddr | (kaddr & ~PAGE_MASK);
-       }
-}
-
-extern inline void kunmap(unsigned long vaddr, enum km_type type)
-{
-#ifdef BIGMEM_DEBUG
-       enum fixed_addresses idx = type+KM_TYPE_NR*smp_processor_id();
-       if ((vaddr & PAGE_MASK) == __fix_to_virt(FIX_KMAP_BEGIN+idx))
-       {
-               /* force other mappings to Oops if they'll try to access
-                  this pte without first remap it */
-               pte_clear(kmap_pte-idx);
-               __flush_tlb_one(vaddr);
-       }
-#endif
-}
-
-#endif /* _ASM_BIGMEM_H */
index 1914385eb940b18630c783b17111d3779c21508d..4ae25be50754bb6ed7cea5434f97438c62dc61c5 100644 (file)
@@ -236,6 +236,7 @@ static void __init check_amd_k6(void)
  * have the F0 0F bug, which lets nonpriviledged users lock up the system:
  */
 
+#ifndef CONFIG_M686
 extern void trap_init_f00f_bug(void);
 
 static void __init check_pentium_f00f(void)
@@ -250,6 +251,7 @@ static void __init check_pentium_f00f(void)
                trap_init_f00f_bug();
        }
 }
+#endif
 
 /*
  * Perform the Cyrix 5/2 test. A Cyrix won't change
@@ -424,7 +426,9 @@ static void __init check_bugs(void)
        check_hlt();
        check_popad();
        check_amd_k6();
+#ifndef CONFIG_M686
        check_pentium_f00f();
+#endif
        check_cyrix_coma();
        system_utsname.machine[1] = '0' + boot_cpu_data.x86;
 }
index 34c82dbe01d08833afef179d7e694928b252f1f0..01f6a18714216743e95ec60d6745e730d9ddd9c6 100644 (file)
@@ -17,7 +17,7 @@
 #include <linux/kernel.h>
 #include <asm/apic.h>
 #include <asm/page.h>
-#ifdef CONFIG_BIGMEM
+#ifdef CONFIG_HIGHMEM
 #include <linux/threads.h>
 #include <asm/kmap_types.h>
 #endif
@@ -34,7 +34,7 @@
  *
  * these 'compile-time allocated' memory buffers are
  * fixed-size 4k pages. (or larger if used with an increment
- * bigger than 1) use fixmap_set(idx,phys) to associate
+ * highger than 1) use fixmap_set(idx,phys) to associate
  * physical memory with fixmap indices.
  *
  * TLB entries of such buffers will not be flushed across
@@ -61,7 +61,7 @@ enum fixed_addresses {
        FIX_LI_PCIA,    /* Lithium PCI Bridge A */
        FIX_LI_PCIB,    /* Lithium PCI Bridge B */
 #endif
-#ifdef CONFIG_BIGMEM
+#ifdef CONFIG_HIGHMEM
        FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */
        FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
 #endif
diff --git a/include/asm-i386/highmem.h b/include/asm-i386/highmem.h
new file mode 100644 (file)
index 0000000..bd5564a
--- /dev/null
@@ -0,0 +1,85 @@
+/*
+ * highmem.h: virtual kernel memory mappings for high memory
+ *
+ * Used in CONFIG_HIGHMEM systems for memory pages which
+ * are not addressable by direct kernel virtual adresses.
+ *
+ * Copyright (C) 1999 Gerhard Wichert, Siemens AG
+ *                   Gerhard.Wichert@pdb.siemens.de
+ *
+ *
+ * Redesigned the x86 32-bit VM architecture to deal with 
+ * up to 16 Terrabyte physical memory. With current x86 CPUs
+ * we now support up to 64 Gigabytes physical RAM.
+ *
+ * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
+ */
+
+#ifndef _ASM_HIGHMEM_H
+#define _ASM_HIGHMEM_H
+
+#include <linux/init.h>
+
+/* undef for production */
+#define HIGHMEM_DEBUG 1
+
+/* declarations for highmem.c */
+extern unsigned long highstart_pfn, highend_pfn;
+
+extern pte_t *kmap_pte;
+extern pgprot_t kmap_prot;
+
+extern void kmap_init(void) __init;
+
+/* kmap helper functions necessary to access the highmem pages in kernel */
+#include <asm/pgtable.h>
+#include <asm/kmap_types.h>
+
+extern inline unsigned long kmap(struct page *page, enum km_type type)
+{
+       if (page < highmem_start_page)
+               return page_address(page);
+       {
+               enum fixed_addresses idx = type+KM_TYPE_NR*smp_processor_id();
+               unsigned long vaddr = __fix_to_virt(FIX_KMAP_BEGIN+idx);
+
+#if HIGHMEM_DEBUG
+               if (!pte_none(*(kmap_pte-idx)))
+               {
+                       __label__ here;
+               here:
+                       printk(KERN_ERR "not null pte on CPU %d from %p\n",
+                              smp_processor_id(), &&here);
+               }
+#endif
+               set_pte(kmap_pte-idx, mk_pte(page, kmap_prot));
+               __flush_tlb_one(vaddr);
+
+               return vaddr;
+       }
+}
+
+extern inline void kunmap(unsigned long vaddr, enum km_type type)
+{
+#if HIGHMEM_DEBUG
+       enum fixed_addresses idx = type+KM_TYPE_NR*smp_processor_id();
+       if ((vaddr & PAGE_MASK) == __fix_to_virt(FIX_KMAP_BEGIN+idx))
+       {
+               /* force other mappings to Oops if they'll try to access
+                  this pte without first remap it */
+               pte_clear(kmap_pte-idx);
+               __flush_tlb_one(vaddr);
+       }
+#endif
+}
+
+extern inline void kmap_check(void)
+{
+#if HIGHMEM_DEBUG
+       int idx_base = KM_TYPE_NR*smp_processor_id(), i;
+       for (i = idx_base; i < idx_base+KM_TYPE_NR; i++)
+               if (!pte_none(*(kmap_pte-i)))
+                       BUG();
+#endif
+}
+#endif /* _ASM_HIGHMEM_H */
index 906fca475d8484f7b829af2f7fba288a162ffc29..95cbb151df768e6e21cc478b1f5a15e37759257b 100644 (file)
@@ -103,28 +103,27 @@ __OUTS(l)
 #include <linux/vmalloc.h>
 #include <asm/page.h>
 
-#define __io_virt(x)           ((void *)(PAGE_OFFSET | (unsigned long)(x)))
-#define __io_phys(x)           ((unsigned long)(x) & ~PAGE_OFFSET)
+/*
+ * Temporary debugging check to catch old code using
+ * unmapped ISA addresses. Will be removed in 2.4.
+ */
+#define __io_virt(x) ((unsigned long)(x) < PAGE_OFFSET ? \
+       ({ __label__ __l; __l: printk("io mapaddr %p not valid at %p!\n", (char *)(x), &&__l); __va(x); }) : (char *)(x))
+#define __io_phys(x) ((unsigned long)(x) < PAGE_OFFSET ? \
+       ({ __label__ __l; __l: printk("io mapaddr %p not valid at %p!\n", (char *)(x), &&__l); (unsigned long)(x); }) : __pa(x))
+
 /*
  * Change virtual addresses to physical addresses and vv.
  * These are pretty trivial
  */
 extern inline unsigned long virt_to_phys(volatile void * address)
 {
-#ifdef CONFIG_BIGMEM
        return __pa(address);
-#else
-       return __io_phys(address);
-#endif
 }
 
 extern inline void * phys_to_virt(unsigned long address)
 {
-#ifdef CONFIG_BIGMEM
        return __va(address);
-#else
-       return __io_virt(address);
-#endif
 }
 
 extern void * __ioremap(unsigned long offset, unsigned long size, unsigned long flags);
@@ -177,6 +176,23 @@ extern void iounmap(void *addr);
 #define memcpy_fromio(a,b,c)   memcpy((a),__io_virt(b),(c))
 #define memcpy_toio(a,b,c)     memcpy(__io_virt(a),(b),(c))
 
+/*
+ * ISA space is 'always mapped' on a typical x86 system, no need to
+ * explicitly ioremap() it. The fact that the ISA IO space is mapped
+ * to PAGE_OFFSET is pure coincidence - it does not mean ISA values
+ * are physical addresses. The following constant pointer can be
+ * used as the IO-area pointer (it can be iounmapped as well, so the
+ * analogy with PCI is quite large):
+ */
+#define __ISA_IO_base ((char *)(PAGE_OFFSET))
+
+#define isa_readb(a) readb(__ISA_IO_base + (a))
+#define isa_readw(a) readb(__ISA_IO_base + (a))
+#define isa_readl(a) readb(__ISA_IO_base + (a))
+#define isa_writeb(b,a) writeb(b,__ISA_IO_base + (a))
+#define isa_writew(w,a) writeb(w,__ISA_IO_base + (a))
+#define isa_writel(l,a) writeb(l,__ISA_IO_base + (a))
+
 /*
  * Again, i386 does not require mem IO specific function.
  */
index 1eb4ac09368881d3c556e37786beed34a722d6fc..11577168af04335394ab1fbfca8f7cc812f77738 100644 (file)
@@ -9,8 +9,6 @@
 #ifdef __KERNEL__
 #ifndef __ASSEMBLY__
 
-#define STRICT_MM_TYPECHECKS
-
 #include <linux/config.h>
 
 #ifdef CONFIG_X86_USE_3DNOW
 
 #endif
 
-#ifdef STRICT_MM_TYPECHECKS
 /*
  * These are used to make use of C type-checking..
  */
+#if CONFIG_X86_PAE
+typedef struct { unsigned long long pte; } pte_t;
+typedef struct { unsigned long long pmd; } pmd_t;
+typedef struct { unsigned long long pgd; } pgd_t;
+#else
 typedef struct { unsigned long pte; } pte_t;
 typedef struct { unsigned long pmd; } pmd_t;
 typedef struct { unsigned long pgd; } pgd_t;
+#endif
+
 typedef struct { unsigned long pgprot; } pgprot_t;
 
 #define pte_val(x)     ((x).pte)
@@ -51,26 +55,6 @@ typedef struct { unsigned long pgprot; } pgprot_t;
 #define __pgd(x)       ((pgd_t) { (x) } )
 #define __pgprot(x)    ((pgprot_t) { (x) } )
 
-#else
-/*
- * .. while these make it easier on the compiler
- */
-typedef unsigned long pte_t;
-typedef unsigned long pmd_t;
-typedef unsigned long pgd_t;
-typedef unsigned long pgprot_t;
-
-#define pte_val(x)     (x)
-#define pmd_val(x)     (x)
-#define pgd_val(x)     (x)
-#define pgprot_val(x)  (x)
-
-#define __pte(x)       (x)
-#define __pmd(x)       (x)
-#define __pgd(x)       (x)
-#define __pgprot(x)    (x)
-
-#endif
 #endif /* !__ASSEMBLY__ */
 
 /* to align the pointer to the (next) page boundary */
@@ -93,8 +77,16 @@ typedef unsigned long pgprot_t;
 
 #ifndef __ASSEMBLY__
 
+extern int console_loglevel;
+
+/*
+ * Tell the user there is some problem. Beep too, so we can
+ * see^H^H^Hhear bugs in early bootup as well!
+ */
 #define BUG() do { \
+       __asm__ __volatile__ ("movb $0x3,%al; outb %al,$0x61"); \
        printk("kernel BUG at %s:%d!\n", __FILE__, __LINE__); \
+       console_loglevel = 0; \
        __asm__ __volatile__(".byte 0x0f,0x0b"); \
 } while (0)
 
diff --git a/include/asm-i386/pgtable-2level.h b/include/asm-i386/pgtable-2level.h
new file mode 100644 (file)
index 0000000..a8a0523
--- /dev/null
@@ -0,0 +1,62 @@
+#ifndef _I386_PGTABLE_2LEVEL_H
+#define _I386_PGTABLE_2LEVEL_H
+
+/*
+ * traditional i386 two-level paging structure:
+ */
+
+#define PGDIR_SHIFT    22
+#define PTRS_PER_PGD   1024
+
+/*
+ * the i386 is two-level, so we don't really have any
+ * PMD directory physically.
+ */
+#define PMD_SHIFT      22
+#define PTRS_PER_PMD   1
+
+#define PTRS_PER_PTE   1024
+
+#define pte_ERROR(e) \
+       printk("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e))
+#define pmd_ERROR(e) \
+       printk("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pmd_val(e))
+#define pgd_ERROR(e) \
+       printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
+
+/*
+ * The "pgd_xxx()" functions here are trivial for a folded two-level
+ * setup: the pgd is never bad, and a pmd always exists (as it's folded
+ * into the pgd entry)
+ */
+extern inline int pgd_none(pgd_t pgd)          { return 0; }
+extern inline int pgd_bad(pgd_t pgd)           { return 0; }
+extern inline int pgd_present(pgd_t pgd)       { return 1; }
+#define pgd_clear(xp)  do { pgd_val(*(xp)) = 0; } while (0)
+
+#define pgd_page(pgd) \
+((unsigned long) __va(pgd_val(pgd) & PAGE_MASK))
+
+extern inline pmd_t * pmd_offset(pgd_t * dir, unsigned long address)
+{
+       return (pmd_t *) dir;
+}
+
+extern __inline__ pmd_t *get_pmd_fast(void)
+{
+       return (pmd_t *)0;
+}
+
+extern __inline__ void free_pmd_fast(pmd_t *pmd) { }
+extern __inline__ void free_pmd_slow(pmd_t *pmd) { }
+
+extern inline pmd_t * pmd_alloc(pgd_t *pgd, unsigned long address)
+{
+       if (!pgd)
+               BUG();
+       return (pmd_t *) pgd;
+}
+
+#define SWP_ENTRY(type,offset) __pte((((type) << 1) | ((offset) << 8)))
+
+#endif /* _I386_PGTABLE_2LEVEL_H */
diff --git a/include/asm-i386/pgtable-3level.h b/include/asm-i386/pgtable-3level.h
new file mode 100644 (file)
index 0000000..99d7181
--- /dev/null
@@ -0,0 +1,131 @@
+#ifndef _I386_PGTABLE_3LEVEL_H
+#define _I386_PGTABLE_3LEVEL_H
+
+/*
+ * Intel Physical Address Extension (PAE) Mode - three-level page
+ * tables on PPro+ CPUs.
+ *
+ * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
+ */
+
+/*
+ * PGDIR_SHIFT determines what a top-level page table entry can map
+ */
+#define PGDIR_SHIFT    30
+#define PTRS_PER_PGD   4
+
+/*
+ * PMD_SHIFT determines the size of the area a middle-level
+ * page table can map
+ */
+#define PMD_SHIFT      21
+#define PTRS_PER_PMD   512
+
+/*
+ * entries per page directory level
+ */
+#define PTRS_PER_PTE   512
+
+#define pte_ERROR(e) \
+       printk("%s:%d: bad pte %016Lx.\n", __FILE__, __LINE__, pte_val(e))
+#define pmd_ERROR(e) \
+       printk("%s:%d: bad pmd %016Lx.\n", __FILE__, __LINE__, pmd_val(e))
+#define pgd_ERROR(e) \
+       printk("%s:%d: bad pgd %016Lx.\n", __FILE__, __LINE__, pgd_val(e))
+
+/*
+ * Subtle, in PAE mode we cannot have zeroes in the top level
+ * page directory, the CPU enforces this.
+ */
+#define pgd_none(x)    (pgd_val(x) == 1ULL)
+extern inline int pgd_bad(pgd_t pgd)           { return 0; }
+extern inline int pgd_present(pgd_t pgd)       { return !pgd_none(pgd); }
+/*
+ * Pentium-II errata A13: in PAE mode we explicitly have to flush
+ * the TLB via cr3 if the top-level pgd is changed... This was one tough
+ * thing to find out - guess i should first read all the documentation
+ * next time around ;)
+ */
+extern inline void __pgd_clear (pgd_t * pgd)
+{
+       pgd_val(*pgd) = 1; // no zero allowed!
+}
+
+extern inline void pgd_clear (pgd_t * pgd)
+{
+       __pgd_clear(pgd);
+       __flush_tlb();
+}
+
+#define pgd_page(pgd) \
+((unsigned long) __va(pgd_val(pgd) & PAGE_MASK))
+
+/* Find an entry in the second-level page table.. */
+#define pmd_offset(dir, address) ((pmd_t *) pgd_page(*(dir)) + \
+                       __pmd_offset(address))
+
+extern __inline__ pmd_t *get_pmd_slow(void)
+{
+       pmd_t *ret = (pmd_t *)__get_free_page(GFP_KERNEL);
+
+       if (ret)
+               memset(ret, 0, PAGE_SIZE);
+       return ret;
+}
+
+extern __inline__ pmd_t *get_pmd_fast(void)
+{
+       unsigned long *ret;
+
+       if ((ret = pmd_quicklist) != NULL) {
+               pmd_quicklist = (unsigned long *)(*ret);
+               ret[0] = 0;
+               pgtable_cache_size--;
+       } else
+               ret = (unsigned long *)get_pmd_slow();
+       return (pmd_t *)ret;
+}
+
+extern __inline__ void free_pmd_fast(pmd_t *pmd)
+{
+       *(unsigned long *)pmd = (unsigned long) pmd_quicklist;
+       pmd_quicklist = (unsigned long *) pmd;
+       pgtable_cache_size++;
+}
+
+extern __inline__ void free_pmd_slow(pmd_t *pmd)
+{
+       free_page((unsigned long)pmd);
+}
+
+extern inline pmd_t * pmd_alloc(pgd_t *pgd, unsigned long address)
+{
+       if (!pgd)
+               BUG();
+       address = (address >> PMD_SHIFT) & (PTRS_PER_PMD - 1);
+       if (pgd_none(*pgd)) {
+               pmd_t *page = get_pmd_fast();
+
+               if (!page)
+                       page = get_pmd_slow();
+               if (page) {
+                       if (pgd_none(*pgd)) {
+                               pgd_val(*pgd) = 1 + __pa(page);
+                               __flush_tlb();
+                               return page + address;
+                       } else
+                               free_pmd_fast(page);
+               } else
+                       return NULL;
+       }
+       return (pmd_t *)pgd_page(*pgd) + address;
+}
+
+/*
+ * Subtle. offset can overflow 32 bits and that's a feature - we can do
+ * up to 16 TB swap on PAE. (Not that anyone should need that much
+ * swapspace, but who knows?)
+ */
+#define SWP_ENTRY(type,offset) __pte((((type) << 1) | ((offset) << 8ULL)))
+
+#endif /* _I386_PGTABLE_3LEVEL_H */
index aea0cd14b9c4d552875191cdedfe9e622fb94b1e..fdc7b01070092f8fc3b380e4e31b4fd72a08fb54 100644 (file)
@@ -100,44 +100,50 @@ static inline void flush_tlb_range(struct mm_struct * mm, unsigned long start, u
        flush_tlb_mm(mm);
 }
 
-
 #endif
 #endif /* !__ASSEMBLY__ */
 
+#define pgd_quicklist (current_cpu_data.pgd_quick)
+#define pmd_quicklist (current_cpu_data.pmd_quick)
+#define pte_quicklist (current_cpu_data.pte_quick)
+#define pgtable_cache_size (current_cpu_data.pgtable_cache_sz)
+
+/*
+ * The Linux x86 paging architecture is 'compile-time dual-mode', it
+ * implements both the traditional 2-level x86 page tables and the
+ * newer 3-level PAE-mode page tables.
+ */
+#ifndef __ASSEMBLY__
+#if CONFIG_X86_PAE
+# include <asm/pgtable-3level.h>
+#else
+# include <asm/pgtable-2level.h>
+#endif
+#endif
 
-/* Certain architectures need to do special things when PTEs
+/*
+ * Certain architectures need to do special things when PTEs
  * within a page table are directly modified.  Thus, the following
  * hook is made available.
  */
 #define set_pte(pteptr, pteval) ((*(pteptr)) = (pteval))
 
-/* PMD_SHIFT determines the size of the area a second-level page table can map */
-#define PMD_SHIFT      22
+#define __beep() asm("movb $0x3,%al; outb %al,$0x61")
+
 #define PMD_SIZE       (1UL << PMD_SHIFT)
 #define PMD_MASK       (~(PMD_SIZE-1))
-
-/* PGDIR_SHIFT determines what a third-level page table entry can map */
-#define PGDIR_SHIFT    22
 #define PGDIR_SIZE     (1UL << PGDIR_SHIFT)
 #define PGDIR_MASK     (~(PGDIR_SIZE-1))
 
-/*
- * entries per page directory level: the i386 is two-level, so
- * we don't really have any PMD directory physically.
- */
-#define PTRS_PER_PTE   1024
-#define PTRS_PER_PMD   1
-#define PTRS_PER_PGD   1024
 #define USER_PTRS_PER_PGD      (TASK_SIZE/PGDIR_SIZE)
 
-/*
- * pgd entries used up by user/kernel:
- */
-
 #define USER_PGD_PTRS (PAGE_OFFSET >> PGDIR_SHIFT)
 #define KERNEL_PGD_PTRS (PTRS_PER_PGD-USER_PGD_PTRS)
-#define __USER_PGD_PTRS ((__PAGE_OFFSET >> PGDIR_SHIFT) & 0x3ff)
-#define __KERNEL_PGD_PTRS (PTRS_PER_PGD-__USER_PGD_PTRS)
+
+#define TWOLEVEL_PGDIR_SHIFT   22
+#define BOOT_USER_PGD_PTRS (__PAGE_OFFSET >> TWOLEVEL_PGDIR_SHIFT)
+#define BOOT_KERNEL_PGD_PTRS (1024-BOOT_USER_PGD_PTRS)
+
 
 #ifndef __ASSEMBLY__
 /* Just any arbitrary offset to the start of the vmalloc VM area: the
@@ -166,7 +172,7 @@ static inline void flush_tlb_range(struct mm_struct * mm, unsigned long start, u
 #define _PAGE_PCD      0x010
 #define _PAGE_ACCESSED 0x020
 #define _PAGE_DIRTY    0x040
-#define _PAGE_4M       0x080   /* 4 MB page, Pentium+, if present.. */
+#define _PAGE_PSE      0x080   /* 4 MB (or 2MB) page, Pentium+, if present.. */
 #define _PAGE_GLOBAL   0x100   /* Global TLB entry PPro+ */
 
 #define _PAGE_PROTNONE 0x080   /* If not present */
@@ -213,40 +219,24 @@ static inline void flush_tlb_range(struct mm_struct * mm, unsigned long start, u
 
 /* page table for 0-4MB for everybody */
 extern unsigned long pg0[1024];
-/* zero page used for uninitialized stuff */
-extern unsigned long empty_zero_page[1024];
 
 /*
- * BAD_PAGETABLE is used when we need a bogus page-table, while
- * BAD_PAGE is used for a bogus page.
- *
  * ZERO_PAGE is a global shared page that is always zero: used
  * for zero-mapped memory areas etc..
  */
-extern pte_t __bad_page(void);
-extern pte_t * __bad_pagetable(void);
-
-#define BAD_PAGETABLE __bad_pagetable()
-#define BAD_PAGE __bad_page()
-#define ZERO_PAGE(vaddr) ((unsigned long) empty_zero_page)
-
-/* number of bits that fit into a memory pointer */
-#define BITS_PER_PTR                   (8*sizeof(unsigned long))
-
-/* to align the pointer to a pointer address */
-#define PTR_MASK                       (~(sizeof(void*)-1))
-
-/* sizeof(void*)==1<<SIZEOF_PTR_LOG2 */
-/* 64-bit machines, beware!  SRB. */
-#define SIZEOF_PTR_LOG2                        2
+extern unsigned long empty_zero_page[1024];
+#define ZERO_PAGE(vaddr) (mem_map + MAP_NR(empty_zero_page))
 
-/* to find an entry in a page-table */
-#define PAGE_PTR(address) \
-((unsigned long)(address)>>(PAGE_SHIFT-SIZEOF_PTR_LOG2)&PTR_MASK&~PAGE_MASK)
+/*
+ * Handling allocation failures during page table setup.
+ */
+extern void __handle_bad_pmd(pmd_t * pmd);
+extern void __handle_bad_pmd_kernel(pmd_t * pmd);
 
 #define pte_none(x)    (!pte_val(x))
 #define pte_present(x) (pte_val(x) & (_PAGE_PRESENT | _PAGE_PROTNONE))
 #define pte_clear(xp)  do { pte_val(*(xp)) = 0; } while (0)
+#define pte_pagenr(x)  ((unsigned long)((pte_val(x) >> PAGE_SHIFT)))
 
 #define pmd_none(x)    (!pmd_val(x))
 #define        pmd_bad(x)      ((pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE)
@@ -254,14 +244,12 @@ extern pte_t * __bad_pagetable(void);
 #define pmd_clear(xp)  do { pmd_val(*(xp)) = 0; } while (0)
 
 /*
- * The "pgd_xxx()" functions here are trivial for a folded two-level
- * setup: the pgd is never bad, and a pmd always exists (as it's folded
- * into the pgd entry)
+ * Permanent address of a page. Obviously must never be
+ * called on a highmem page.
  */
-extern inline int pgd_none(pgd_t pgd)          { return 0; }
-extern inline int pgd_bad(pgd_t pgd)           { return 0; }
-extern inline int pgd_present(pgd_t pgd)       { return 1; }
-extern inline void pgd_clear(pgd_t * pgdp)     { }
+#define page_address(page) ({ if (PageHighMem(page)) BUG(); PAGE_OFFSET + (((page) - mem_map) << PAGE_SHIFT); })
+#define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT))
+#define pte_page(x) (mem_map+pte_pagenr(x))
 
 /*
  * The following only work if pte_present() is true.
@@ -288,8 +276,15 @@ extern inline pte_t pte_mkwrite(pte_t pte) { pte_val(pte) |= _PAGE_RW; return pt
  * Conversion functions: convert a page and protection to a page entry,
  * and a page entry and page directory to the page they refer to.
  */
-#define mk_pte(page, pgprot) \
-({ pte_t __pte; pte_val(__pte) = __pa(page) + pgprot_val(pgprot); __pte; })
+
+extern inline pte_t mk_pte(struct page *page, pgprot_t pgprot)
+{
+       pte_t __pte;
+
+       pte_val(__pte) = (page-mem_map)*(unsigned long long)PAGE_SIZE +
+                               pgprot_val(pgprot);
+       return __pte;
+}
 
 /* This takes a physical page address that is used by the remapping functions */
 #define mk_pte_phys(physpage, pgprot) \
@@ -298,28 +293,29 @@ extern inline pte_t pte_mkwrite(pte_t pte)        { pte_val(pte) |= _PAGE_RW; return pt
 extern inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 { pte_val(pte) = (pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot); return pte; }
 
-#define pte_page(pte) \
-((unsigned long) __va(pte_val(pte) & PAGE_MASK))
+#define page_pte_prot(page,prot) mk_pte(page, prot)
+#define page_pte(page) page_pte_prot(page, __pgprot(0))
 
 #define pmd_page(pmd) \
 ((unsigned long) __va(pmd_val(pmd) & PAGE_MASK))
 
-/* to find an entry in a page-table-directory */
-#define pgd_offset(mm, address) \
-((mm)->pgd + ((address) >> PGDIR_SHIFT))
+/* to find an entry in a page-table-directory. */
+#define __pgd_offset(address) \
+               ((address >> PGDIR_SHIFT) & (PTRS_PER_PGD-1))
+
+#define pgd_offset(mm, address) ((mm)->pgd+__pgd_offset(address))
 
 /* to find an entry in a kernel page-table-directory */
 #define pgd_offset_k(address) pgd_offset(&init_mm, address)
 
-/* Find an entry in the second-level page table.. */
-extern inline pmd_t * pmd_offset(pgd_t * dir, unsigned long address)
-{
-       return (pmd_t *) dir;
-}
+#define __pmd_offset(address) \
+               (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1))
 
-/* Find an entry in the third-level page table.. */ 
-#define pte_offset(pmd, address) \
-((pte_t *) (pmd_page(*pmd) + ((address>>10) & ((PTRS_PER_PTE-1)<<2))))
+/* Find an entry in the third-level page table.. */
+#define __pte_offset(address) \
+               ((address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
+#define pte_offset(dir, address) ((pte_t *) pmd_page(*(dir)) + \
+                       __pte_offset(address))
 
 /*
  * Allocate and free page tables. The xxx_kernel() versions are
@@ -327,17 +323,25 @@ extern inline pmd_t * pmd_offset(pgd_t * dir, unsigned long address)
  * if any.
  */
 
-#define pgd_quicklist (current_cpu_data.pgd_quick)
-#define pmd_quicklist ((unsigned long *)0)
-#define pte_quicklist (current_cpu_data.pte_quick)
-#define pgtable_cache_size (current_cpu_data.pgtable_cache_sz)
-
 extern __inline__ pgd_t *get_pgd_slow(void)
 {
        pgd_t *ret = (pgd_t *)__get_free_page(GFP_KERNEL);
 
        if (ret) {
+#if 0
+               /*
+                * On PAE allocating a whole page is overkill - we will
+                * either embedd this in mm_struct, or do a SLAB cache.
+                */
+               memcpy(ret, swapper_pg_dir, PTRS_PER_PGD * sizeof(pgd_t));
+#endif
+#if CONFIG_X86_PAE
+               int i;
+               for (i = 0; i < USER_PTRS_PER_PGD; i++)
+                       __pgd_clear(ret + i);
+#else
                memset(ret, 0, USER_PTRS_PER_PGD * sizeof(pgd_t));
+#endif
                memcpy(ret + USER_PTRS_PER_PGD, swapper_pg_dir + USER_PTRS_PER_PGD, (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
        }
        return ret;
@@ -395,30 +399,15 @@ extern __inline__ void free_pte_slow(pte_t *pte)
        free_page((unsigned long)pte);
 }
 
-/* We don't use pmd cache, so these are dummy routines */
-extern __inline__ pmd_t *get_pmd_fast(void)
-{
-       return (pmd_t *)0;
-}
-
-extern __inline__ void free_pmd_fast(pmd_t *pmd)
-{
-}
-
-extern __inline__ void free_pmd_slow(pmd_t *pmd)
-{
-}
-
-extern void __bad_pte(pmd_t *pmd);
-extern void __bad_pte_kernel(pmd_t *pmd);
-
 #define pte_free_kernel(pte)    free_pte_slow(pte)
-#define pte_free(pte)           free_pte_slow(pte)
-#define pgd_free(pgd)           free_pgd_slow(pgd)
-#define pgd_alloc()             get_pgd_fast()
+#define pte_free(pte)     free_pte_slow(pte)
+#define pgd_free(pgd)     free_pgd_slow(pgd)
+#define pgd_alloc()         get_pgd_fast()
 
 extern inline pte_t * pte_alloc_kernel(pmd_t * pmd, unsigned long address)
 {
+       if (!pmd)
+               BUG();
        address = (address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1);
        if (pmd_none(*pmd)) {
                pte_t * page = (pte_t *) get_pte_fast();
@@ -429,7 +418,7 @@ extern inline pte_t * pte_alloc_kernel(pmd_t * pmd, unsigned long address)
                return page + address;
        }
        if (pmd_bad(*pmd)) {
-               __bad_pte_kernel(pmd);
+               __handle_bad_pmd_kernel(pmd);
                return NULL;
        }
        return (pte_t *) pmd_page(*pmd) + address;
@@ -437,13 +426,13 @@ extern inline pte_t * pte_alloc_kernel(pmd_t * pmd, unsigned long address)
 
 extern inline pte_t * pte_alloc(pmd_t * pmd, unsigned long address)
 {
-       address = (address >> (PAGE_SHIFT-2)) & 4*(PTRS_PER_PTE - 1);
+       address = (address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1);
 
        if (pmd_none(*pmd))
                goto getnew;
        if (pmd_bad(*pmd))
                goto fix;
-       return (pte_t *) (pmd_page(*pmd) + address);
+       return (pte_t *)pmd_page(*pmd) + address;
 getnew:
 {
        unsigned long page = (unsigned long) get_pte_fast();
@@ -451,25 +440,19 @@ getnew:
        if (!page)
                return get_pte_slow(pmd, address);
        pmd_val(*pmd) = _PAGE_TABLE + __pa(page);
-       return (pte_t *) (page + address);
+       return (pte_t *)page + address;
 }
 fix:
-       __bad_pte(pmd);
+       __handle_bad_pmd(pmd);
        return NULL;
 }
 
 /*
  * allocating and freeing a pmd is trivial: the 1-entry pmd is
  * inside the pgd, so has no extra memory associated with it.
+ * (In the PAE case we free the page.)
  */
-extern inline void pmd_free(pmd_t * pmd)
-{
-}
-
-extern inline pmd_t * pmd_alloc(pgd_t * pgd, unsigned long address)
-{
-       return (pmd_t *) pgd;
-}
+#define pmd_free(pmd)     free_pmd_slow(pmd)
 
 #define pmd_free_kernel                pmd_free
 #define pmd_alloc_kernel       pmd_alloc
@@ -483,7 +466,7 @@ extern inline void set_pgdir(unsigned long address, pgd_t entry)
 #ifdef __SMP__
        int i;
 #endif 
-        
+
        read_lock(&tasklist_lock);
        for_each_task(p) {
                if (!p->mm)
@@ -512,9 +495,8 @@ extern inline void update_mmu_cache(struct vm_area_struct * vma,
 {
 }
 
-#define SWP_TYPE(entry) (((entry) >> 1) & 0x3f)
-#define SWP_OFFSET(entry) ((entry) >> 8)
-#define SWP_ENTRY(type,offset) (((type) << 1) | ((offset) << 8))
+#define SWP_TYPE(entry) (((pte_val(entry)) >> 1) & 0x3f)
+#define SWP_OFFSET(entry) ((pte_val(entry)) >> 8)
 
 #define module_map      vmalloc
 #define module_unmap    vfree
@@ -527,4 +509,4 @@ extern inline void update_mmu_cache(struct vm_area_struct * vma,
 
 #define io_remap_page_range remap_page_range
 
-#endif /* _I386_PAGE_H */
+#endif /* _I386_PGTABLE_H */
index 939ca0b31706d3d092d2ca1f95c13c2d626f36a3..88f06686407de5c2e5db82cbc1731488d6f1b197 100644 (file)
@@ -46,6 +46,7 @@ struct cpuinfo_x86 {
        int     coma_bug;
        unsigned long loops_per_sec;
        unsigned long *pgd_quick;
+       unsigned long *pmd_quick;
        unsigned long *pte_quick;
        unsigned long pgtable_cache_sz;
 };
@@ -106,6 +107,12 @@ extern struct cpuinfo_x86 cpu_data[];
 #define current_cpu_data boot_cpu_data
 #endif
 
+#define cpu_has_pge \
+               (boot_cpu_data.x86_capability & X86_FEATURE_PGE)
+#define cpu_has_pse \
+               (boot_cpu_data.x86_capability & X86_FEATURE_PSE)
+#define cpu_has_pae \
+               (boot_cpu_data.x86_capability & X86_FEATURE_PAE)
 #define cpu_has_tsc \
                (cpu_data[smp_processor_id()].x86_capability & X86_FEATURE_TSC)
 
index 52c27bead13dfb4009f635ccfc49a028d1c35c88..2aa6aec4e4e4d64a12060c092ac126f33e9914e7 100644 (file)
@@ -166,7 +166,8 @@ struct mpc_config_lintsrc
  
 extern int smp_found_config;
 extern void init_smp_config(void);
-extern unsigned long smp_alloc_memory(unsigned long mem_base);
+extern void init_smp_mappings(void);
+extern void smp_alloc_memory(void);
 extern unsigned long cpu_present_map;
 extern unsigned long cpu_online_map;
 extern volatile unsigned long smp_invalidate_needed;
@@ -179,6 +180,7 @@ extern void smp_invalidate_rcv(void);               /* Process an NMI */
 extern void smp_local_timer_interrupt(struct pt_regs * regs);
 extern void (*mtrr_hook) (void);
 extern void setup_APIC_clocks(void);
+extern void zap_low_mappings (void);
 extern volatile int cpu_number_map[NR_CPUS];
 extern volatile int __cpu_logical_map[NR_CPUS];
 extern inline int cpu_logical_map(int cpu)
diff --git a/include/linux/bigmem.h b/include/linux/bigmem.h
deleted file mode 100644 (file)
index 289183b..0000000
+++ /dev/null
@@ -1,48 +0,0 @@
-#ifndef _LINUX_BIGMEM_H
-#define _LINUX_BIGMEM_H
-
-#include <linux/config.h>
-
-#ifdef CONFIG_BIGMEM
-
-#include <asm/bigmem.h>
-
-/* declarations for linux/mm/bigmem.c */
-extern unsigned long bigmem_mapnr;
-extern int nr_free_bigpages;
-
-extern struct page * prepare_bigmem_swapout(struct page *);
-extern struct page * replace_with_bigmem(struct page *);
-
-#else /* CONFIG_BIGMEM */
-
-#define prepare_bigmem_swapout(page) page
-#define replace_with_bigmem(page) page
-#define kmap(kaddr, type) kaddr
-#define kunmap(vaddr, type) do { } while (0)
-#define nr_free_bigpages 0
-
-#endif /* CONFIG_BIGMEM */
-
-/* when CONFIG_BIGMEM is not set these will be plain clear/copy_page */
-extern inline void clear_bigpage(unsigned long kaddr)
-{
-       unsigned long vaddr;
-
-       vaddr = kmap(kaddr, KM_WRITE);
-       clear_page(vaddr);
-       kunmap(vaddr, KM_WRITE);
-}
-
-extern inline void copy_bigpage(unsigned long to, unsigned long from)
-{
-       unsigned long vfrom, vto;
-
-       vfrom = kmap(from, KM_READ);
-       vto = kmap(to, KM_WRITE);
-       copy_page(vto, vfrom);
-       kunmap(vfrom, KM_READ);
-       kunmap(vto, KM_WRITE);
-}
-
-#endif /* _LINUX_BIGMEM_H */
index 4f7fe13f775ea167435f130802b6c1065b217541..31721c101d85a13556661be0c2407c0e34238066 100644 (file)
@@ -18,7 +18,7 @@
  */
 struct linux_binprm{
        char buf[128];
-       unsigned long page[MAX_ARG_PAGES];
+       struct page *page[MAX_ARG_PAGES];
        unsigned long p; /* current top of mem */
        int sh_bang;
        struct dentry * dentry;
diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
new file mode 100644 (file)
index 0000000..4b18c7c
--- /dev/null
@@ -0,0 +1,24 @@
+#ifndef _LINUX_BOOTMEM_H
+#define _LINUX_BOOTMEM_H
+
+#include <linux/config.h>
+#include <asm/pgtable.h>
+
+/*
+ *  simple boot-time physical memory area allocator.
+ */
+
+extern unsigned long max_low_pfn;
+
+extern unsigned long __init init_bootmem (unsigned long addr, unsigned long memend);
+extern void __init reserve_bootmem (unsigned long addr, unsigned long size);
+extern void __init free_bootmem (unsigned long addr, unsigned long size);
+extern void * __init __alloc_bootmem (unsigned long size, unsigned long align);
+#define alloc_bootmem(x) __alloc_bootmem((x), SMP_CACHE_BYTES)
+#define alloc_bootmem_pages(x) __alloc_bootmem((x), PAGE_SIZE)
+extern unsigned long __init free_all_bootmem (void);
+
+#endif /* _LINUX_BOOTMEM_H */
+
+
+
index c3dfc01a5c4244ef702d1da30d6f4c97b047c873..c6c7d76d21e129acf69c71dd1eaaefbe2295e8a1 100644 (file)
@@ -323,6 +323,11 @@ struct iattr {
 #include <linux/quota.h>
 #include <linux/mount.h>
 
+/*
+ * oh the beauties of C type declarations.
+ */
+struct page;
+
 struct inode {
        struct list_head        i_hash;
        struct list_head        i_list;
@@ -350,7 +355,7 @@ struct inode {
        wait_queue_head_t       i_wait;
        struct file_lock        *i_flock;
        struct vm_area_struct   *i_mmap;
-       struct page             *i_pages;
+       struct list_head        i_pages;
        spinlock_t              i_shared_lock;
        struct dquot            *i_dquot[MAXQUOTAS];
        struct pipe_inode_info  *i_pipe;
@@ -769,8 +774,6 @@ extern int fs_may_mount(kdev_t);
 extern int try_to_free_buffers(struct page *);
 extern void refile_buffer(struct buffer_head * buf);
 
-extern atomic_t buffermem;
-
 #define BUF_CLEAN      0
 #define BUF_LOCKED     1       /* Buffers scheduled for write */
 #define BUF_DIRTY      2       /* Dirty buffers, not yet scheduled for write */
@@ -874,7 +877,7 @@ typedef struct {
        int error;
 } read_descriptor_t;
 
-typedef int (*read_actor_t)(read_descriptor_t *, const char *, unsigned long);
+typedef int (*read_actor_t)(read_descriptor_t *, struct page *, unsigned long, unsigned long);
 
 
 extern struct dentry * lookup_dentry(const char *, struct dentry *, unsigned int);
diff --git a/include/linux/highmem.h b/include/linux/highmem.h
new file mode 100644 (file)
index 0000000..e0e9e29
--- /dev/null
@@ -0,0 +1,77 @@
+#ifndef _LINUX_HIGHMEM_H
+#define _LINUX_HIGHMEM_H
+
+#include <linux/config.h>
+#include <asm/pgtable.h>
+
+#ifdef CONFIG_HIGHMEM
+
+extern struct page *highmem_start_page;
+
+#include <asm/highmem.h>
+
+/* declarations for linux/mm/highmem.c */
+extern unsigned long highmem_mapnr;
+extern unsigned long nr_free_highpages;
+
+extern struct page * prepare_highmem_swapout(struct page *);
+extern struct page * replace_with_highmem(struct page *);
+
+#else /* CONFIG_HIGHMEM */
+
+#define prepare_highmem_swapout(page) page
+#define replace_with_highmem(page) page
+#define kmap(page, type) page_address(page)
+#define kunmap(vaddr, type) do { } while (0)
+#define nr_free_highpages 0UL
+
+#endif /* CONFIG_HIGHMEM */
+
+/* when CONFIG_HIGHMEM is not set these will be plain clear/copy_page */
+extern inline void clear_highpage(struct page *page)
+{
+       unsigned long kaddr;
+
+       kaddr = kmap(page, KM_WRITE);
+       clear_page((void *)kaddr);
+       kunmap(kaddr, KM_WRITE);
+}
+
+extern inline void memclear_highpage(struct page *page, unsigned int offset, unsigned int size)
+{
+       unsigned long kaddr;
+
+       if (offset + size > PAGE_SIZE)
+               BUG();
+       kaddr = kmap(page, KM_WRITE);
+       memset((void *)(kaddr + offset), 0, size);
+       kunmap(kaddr, KM_WRITE);
+}
+
+/*
+ * Same but also flushes aliased cache contents to RAM.
+ */
+extern inline void memclear_highpage_flush(struct page *page, unsigned int offset, unsigned int size)
+{
+       unsigned long kaddr;
+
+       if (offset + size > PAGE_SIZE)
+               BUG();
+       kaddr = kmap(page, KM_WRITE);
+       memset((void *)(kaddr + offset), 0, size);
+       flush_page_to_ram(kaddr);
+       kunmap(kaddr, KM_WRITE);
+}
+
+extern inline void copy_highpage(struct page *to, struct page *from)
+{
+       unsigned long vfrom, vto;
+
+       vfrom = kmap(from, KM_READ);
+       vto = kmap(to, KM_WRITE);
+       copy_page((void *)vto, (void *)vfrom);
+       kunmap(vfrom, KM_READ);
+       kunmap(vto, KM_WRITE);
+}
+
+#endif /* _LINUX_HIGHMEM_H */
index 9418888f2351116b2b8e6f7c6195f6d2eb390a3e..420285faf62f02bd2c93d0f50fa476103b0a52ff 100644 (file)
@@ -41,7 +41,6 @@ struct kiobuf
         * region, there won't necessarily be page structs defined for
         * every address. */
 
-       unsigned long * pagelist;
        struct page **  maplist;
 
        unsigned int    locked : 1;     /* If set, pages has been locked */
index 89bea4477369ee9046dbeb1e196ff4d6b14582d1..d5b204c2c8a8939a43b9f095cc90708d78752813 100644 (file)
@@ -94,9 +94,10 @@ struct sysinfo {
        unsigned long totalswap;        /* Total swap space size */
        unsigned long freeswap;         /* swap space still available */
        unsigned short procs;           /* Number of current processes */
-       unsigned long totalbig;         /* Total big memory size */
-       unsigned long freebig;          /* Available big memory size */
-       char _f[20-2*sizeof(long)];     /* Padding: libc5 uses this.. */
+       unsigned long totalhigh;        /* Total high memory size */
+       unsigned long freehigh;         /* Available high memory size */
+       unsigned int mem_unit;          /* Memory unit size in bytes */
+       char _f[20-2*sizeof(long)-sizeof(int)]; /* Padding: libc5 uses this.. */
 };
 
 #endif
index c6df59665c959ac1d8899ed47a95e87845e6ee07..de8393a2d0f5129b9bc5cdb9d36bb525d8df35b7 100644 (file)
@@ -8,6 +8,7 @@
 
 #include <linux/config.h>
 #include <linux/string.h>
+#include <linux/list.h>
 
 extern unsigned long max_mapnr;
 extern unsigned long num_physpages;
@@ -103,9 +104,8 @@ struct vm_operations_struct {
        void (*protect)(struct vm_area_struct *area, unsigned long, size_t, unsigned int newprot);
        int (*sync)(struct vm_area_struct *area, unsigned long, size_t, unsigned int flags);
        void (*advise)(struct vm_area_struct *area, unsigned long, size_t, unsigned int advise);
-       unsigned long (*nopage)(struct vm_area_struct * area, unsigned long address, int write_access);
-       unsigned long (*wppage)(struct vm_area_struct * area, unsigned long address,
-               unsigned long page);
+       struct page * (*nopage)(struct vm_area_struct * area, unsigned long address, int write_access);
+       struct page * (*wppage)(struct vm_area_struct * area, unsigned long address, struct page * page);
        int (*swapout)(struct vm_area_struct *, struct page *);
 };
 
@@ -119,8 +119,7 @@ struct vm_operations_struct {
  */
 typedef struct page {
        /* these must be first (free area handling) */
-       struct page *next;
-       struct page *prev;
+       struct list_head list;
        struct inode *inode;
        unsigned long offset;
        struct page *next_hash;
@@ -149,11 +148,11 @@ typedef struct page {
 #define PG_uptodate             3
 #define PG_decr_after           5
 #define PG_DMA                  7
-#define PG_Slab                         8
+#define PG_slab                         8
 #define PG_swap_cache           9
 #define PG_skip                        10
 #define PG_swap_entry          11
-#define PG_BIGMEM              12
+#define PG_highmem             12
                                /* bits 21-30 unused */
 #define PG_reserved            31
 
@@ -183,27 +182,32 @@ if (!test_and_clear_bit(PG_locked, &(page)->flags)) { \
 #define PageReferenced(page)   (test_bit(PG_referenced, &(page)->flags))
 #define PageDecrAfter(page)    (test_bit(PG_decr_after, &(page)->flags))
 #define PageDMA(page)          (test_bit(PG_DMA, &(page)->flags))
-#define PageSlab(page)         (test_bit(PG_Slab, &(page)->flags))
+#define PageSlab(page)         (test_bit(PG_slab, &(page)->flags))
 #define PageSwapCache(page)    (test_bit(PG_swap_cache, &(page)->flags))
 #define PageReserved(page)     (test_bit(PG_reserved, &(page)->flags))
 
-#define PageSetSlab(page)      (set_bit(PG_Slab, &(page)->flags))
+#define PageSetSlab(page)      (set_bit(PG_slab, &(page)->flags))
 #define PageSetSwapCache(page) (set_bit(PG_swap_cache, &(page)->flags))
 
 #define PageTestandSetSwapCache(page)  \
                        (test_and_set_bit(PG_swap_cache, &(page)->flags))
 
-#define PageClearSlab(page)    (clear_bit(PG_Slab, &(page)->flags))
+#define PageClearSlab(page)    (clear_bit(PG_slab, &(page)->flags))
 #define PageClearSwapCache(page)(clear_bit(PG_swap_cache, &(page)->flags))
 
 #define PageTestandClearSwapCache(page)        \
                        (test_and_clear_bit(PG_swap_cache, &(page)->flags))
-#ifdef CONFIG_BIGMEM
-#define PageBIGMEM(page)       (test_bit(PG_BIGMEM, &(page)->flags))
+#ifdef CONFIG_HIGHMEM
+#define PageHighMem(page)      (test_bit(PG_highmem, &(page)->flags))
 #else
-#define PageBIGMEM(page) 0 /* needed to optimize away at compile time */
+#define PageHighMem(page) 0 /* needed to optimize away at compile time */
 #endif
 
+#define SetPageReserved(page)  do { set_bit(PG_reserved, &(page)->flags); \
+                                       } while (0)
+#define ClearPageReserved(page)        do { test_and_clear_bit(PG_reserved, &(page)->flags); } while (0)
+
+
 /*
  * Various page->flags bits:
  *
@@ -224,7 +228,7 @@ if (!test_and_clear_bit(PG_locked, &(page)->flags)) { \
  *   (e.g. a private data page of one process).
  *
  * A page may be used for kmalloc() or anyone else who does a
- * get_free_page(). In this case the page->count is at least 1, and
+ * __get_free_page(). In this case the page->count is at least 1, and
  * all other fields are unused but should be 0 or NULL. The
  * management of this page is the responsibility of the one who uses
  * it.
@@ -281,20 +285,27 @@ extern mem_map_t * mem_map;
  * goes to clearing the page. If you want a page without the clearing
  * overhead, just use __get_free_page() directly..
  */
+extern struct page * __get_pages(int gfp_mask, unsigned long order);
 #define __get_free_page(gfp_mask) __get_free_pages((gfp_mask),0)
 #define __get_dma_pages(gfp_mask, order) __get_free_pages((gfp_mask) | GFP_DMA,(order))
 extern unsigned long FASTCALL(__get_free_pages(int gfp_mask, unsigned long gfp_order));
+extern struct page * get_free_highpage(int gfp_mask);
 
-extern inline unsigned long get_free_page(int gfp_mask)
+extern inline unsigned long get_zeroed_page(int gfp_mask)
 {
        unsigned long page;
 
        page = __get_free_page(gfp_mask);
        if (page)
-               clear_page(page);
+               clear_page((void *)page);
        return page;
 }
 
+/*
+ * The old interface name will be removed in 2.5:
+ */
+#define get_free_page get_zeroed_page
+
 /* memory.c & swap.c*/
 
 #define free_page(addr) free_pages((addr),0)
@@ -302,7 +313,7 @@ extern int FASTCALL(free_pages(unsigned long addr, unsigned long order));
 extern int FASTCALL(__free_page(struct page *));
 
 extern void show_free_areas(void);
-extern unsigned long put_dirty_page(struct task_struct * tsk,unsigned long page,
+extern struct page * put_dirty_page(struct task_struct * tsk, struct page *page,
        unsigned long address);
 
 extern void clear_page_tables(struct mm_struct *, unsigned long, int);
@@ -322,12 +333,13 @@ extern int ptrace_writedata(struct task_struct *tsk, char * src, unsigned long d
 extern int pgt_cache_water[2];
 extern int check_pgt_cache(void);
 
-extern unsigned long paging_init(unsigned long start_mem, unsigned long end_mem);
-extern void mem_init(unsigned long start_mem, unsigned long end_mem);
+extern void paging_init(void);
+extern void free_area_init(unsigned long);
+extern void mem_init(void);
 extern void show_mem(void);
 extern void oom(struct task_struct * tsk);
 extern void si_meminfo(struct sysinfo * val);
-extern void swapin_readahead(unsigned long);
+extern void swapin_readahead(pte_t);
 
 /* mmap.c */
 extern void vma_init(void);
@@ -359,18 +371,18 @@ extern void put_cached_page(unsigned long);
 #define __GFP_HIGH     0x08
 #define __GFP_IO       0x10
 #define __GFP_SWAP     0x20
-#ifdef CONFIG_BIGMEM
-#define __GFP_BIGMEM   0x40
+#ifdef CONFIG_HIGHMEM
+#define __GFP_HIGHMEM  0x40
 #else
-#define __GFP_BIGMEM   0x0 /* noop */
+#define __GFP_HIGHMEM  0x0 /* noop */
 #endif
 
 #define __GFP_DMA      0x80
 
 #define GFP_BUFFER     (__GFP_LOW | __GFP_WAIT)
 #define GFP_ATOMIC     (__GFP_HIGH)
-#define GFP_BIGUSER    (__GFP_LOW | __GFP_WAIT | __GFP_IO | __GFP_BIGMEM)
 #define GFP_USER       (__GFP_LOW | __GFP_WAIT | __GFP_IO)
+#define GFP_HIGHUSER   (GFP_USER | __GFP_HIGHMEM)
 #define GFP_KERNEL     (__GFP_MED | __GFP_WAIT | __GFP_IO)
 #define GFP_NFS                (__GFP_HIGH | __GFP_WAIT | __GFP_IO)
 #define GFP_KSWAPD     (__GFP_IO | __GFP_SWAP)
@@ -380,10 +392,10 @@ extern void put_cached_page(unsigned long);
 
 #define GFP_DMA                __GFP_DMA
 
-/* Flag - indicates that the buffer can be taken from big memory which is not
+/* Flag - indicates that the buffer can be taken from high memory which is not
    directly addressable by the kernel */
 
-#define GFP_BIGMEM     __GFP_BIGMEM
+#define GFP_HIGHMEM    __GFP_HIGHMEM
 
 /* vma is the first one with  address < vma->vm_end,
  * and even  address < vma->vm_start. Have to extend vma. */
@@ -422,7 +434,7 @@ static inline struct vm_area_struct * find_vma_intersection(struct mm_struct * m
 
 extern struct vm_area_struct *find_extend_vma(struct task_struct *tsk, unsigned long addr);
 
-#define buffer_under_min()     ((atomic_read(&buffermem) >> PAGE_SHIFT) * 100 < \
+#define buffer_under_min()     (atomic_read(&buffermem_pages) * 100 < \
                                buffer_mem.min_percent * num_physpages)
 #define pgcache_under_min()    (atomic_read(&page_cache_size) * 100 < \
                                page_cache.min_percent * num_physpages)
index 0aff25c2908fedbcb8192ba8500eda83202de12e..6410d3d1e88fd670ce6a552976ed5b1ab53df01f 100644 (file)
 
 #include <linux/mm.h>
 #include <linux/fs.h>
+#include <linux/highmem.h>
+#include <linux/list.h>
 
-static inline unsigned long page_address(struct page * page)
+extern inline pte_t get_pagecache_pte(struct page *page)
 {
-       return PAGE_OFFSET + ((page - mem_map) << PAGE_SHIFT);
+       /*
+        * the pagecache is still machineword sized. The rest of the VM
+        * can deal with arbitrary sized ptes.
+        */
+        return __pte(page->offset);
 }
 
 /*
@@ -30,8 +36,8 @@ static inline unsigned long page_address(struct page * page)
 #define PAGE_CACHE_MASK                PAGE_MASK
 #define PAGE_CACHE_ALIGN(addr) (((addr)+PAGE_CACHE_SIZE-1)&PAGE_CACHE_MASK)
 
-#define page_cache_alloc()     __get_free_page(GFP_USER)
-#define page_cache_free(x)     free_page(x)
+#define page_cache_alloc()     __get_pages(GFP_USER, 0)
+#define page_cache_free(x)     __free_page(x)
 #define page_cache_release(x)  __free_page(x)
 
 /*
@@ -54,7 +60,7 @@ extern void page_cache_init(unsigned long);
  * inode pointer and offsets are distributed (ie, we
  * roughly know which bits are "significant")
  */
-static inline unsigned long _page_hashfn(struct inode * inode, unsigned long offset)
+extern inline unsigned long _page_hashfn(struct inode * inode, unsigned long offset)
 {
 #define i (((unsigned long) inode)/(sizeof(struct inode) & ~ (sizeof(struct inode) - 1)))
 #define o (offset >> PAGE_SHIFT)
@@ -82,26 +88,37 @@ extern void __add_page_to_hash_queue(struct page * page, struct page **p);
 extern void add_to_page_cache(struct page * page, struct inode * inode, unsigned long offset);
 extern int add_to_page_cache_unique(struct page * page, struct inode * inode, unsigned long offset, struct page **hash);
 
-static inline void add_page_to_hash_queue(struct page * page, struct inode * inode, unsigned long offset)
+extern inline void add_page_to_hash_queue(struct page * page, struct inode * inode, unsigned long offset)
 {
        __add_page_to_hash_queue(page, page_hash(inode,offset));
 }
 
-static inline void add_page_to_inode_queue(struct inode * inode, struct page * page)
+extern inline void add_page_to_inode_queue(struct inode * inode, struct page * page)
 {
-       struct page **p = &inode->i_pages;
-
-       inode->i_nrpages++;
+       struct list_head *head = &inode->i_pages;
+
+       if (!inode->i_nrpages++) {
+               if (!list_empty(head))
+                       BUG();
+       } else {
+               if (list_empty(head))
+                       BUG();
+       }
+       list_add(&page->list, head);
        page->inode = inode;
-       page->prev = NULL;
-       if ((page->next = *p) != NULL)
-               page->next->prev = page;
-       *p = page;
+}
+
+extern inline void remove_page_from_inode_queue(struct page * page)
+{
+       struct inode * inode = page->inode;
+
+       inode->i_nrpages--;
+       list_del(&page->list);
 }
 
 extern void ___wait_on_page(struct page *);
 
-static inline void wait_on_page(struct page * page)
+extern inline void wait_on_page(struct page * page)
 {
        if (PageLocked(page))
                ___wait_on_page(page);
index dd5dcf2c8b4cf934b2f0e85dad34ce5ab34ee919..81ec83c273bbf87aca6a9738feb4d95ce74282bf 100644 (file)
@@ -426,7 +426,7 @@ struct task_struct {
 /* files */    &init_files, \
 /* mm */       NULL, &init_mm, \
 /* signals */  SPIN_LOCK_UNLOCKED, &init_signals, {{0}}, {{0}}, NULL, &init_task.sigqueue, 0, 0, \
-/* exec cts */ 0,0,0, \
+/* exec cts */ 0,0, \
 }
 
 #ifndef INIT_TASK_SIZE
index d837873123d14b59ddc148fd5ca292428d7cdc4a..6ba237e923da33d761f1e5d1298101864b0c79b0 100644 (file)
@@ -24,7 +24,7 @@ struct shmid_kernel
        struct shmid_ds         u;
        /* the following are private */
        unsigned long           shm_npages;     /* size of segment (pages) */
-       unsigned long           *shm_pages;     /* array of ptrs to frames -> SHMMAX */ 
+       pte_t                   *shm_pages;     /* array of ptrs to frames -> SHMMAX */ 
        struct vm_area_struct   *attaches;      /* descriptors for attaches */
 };
 
@@ -72,7 +72,7 @@ asmlinkage long sys_shmget (key_t key, int size, int flag);
 asmlinkage long sys_shmat (int shmid, char *shmaddr, int shmflg, unsigned long *addr);
 asmlinkage long sys_shmdt (char *shmaddr);
 asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds *buf);
-extern void shm_unuse(unsigned long entry, unsigned long page);
+extern void shm_unuse(pte_t entry, struct page *page);
 
 #endif /* __KERNEL__ */
 
index 3097a8db26f2e26ce33ebc2af2e994cbaa74cc5b..fa344d816a1738f4b154ab16dc4a8cd5024a618a 100644 (file)
@@ -45,7 +45,7 @@ typedef struct kmem_cache_s kmem_cache_t;
 #define        SLAB_CTOR_VERIFY        0x004UL         /* tell constructor it's a verify call */
 
 /* prototypes */
-extern long kmem_cache_init(long, long);
+extern void kmem_cache_init(void);
 extern void kmem_cache_sizes_init(void);
 extern kmem_cache_t *kmem_find_general_cachep(size_t);
 extern kmem_cache_t *kmem_cache_create(const char *, size_t, size_t, unsigned long,
index 0b0baf1e8feb0f6f4e19a54d44da39aa88f09032..7030b788d395c7a2d0da8ec9a4d50bf834325d7b 100644 (file)
@@ -35,8 +35,6 @@ union swap_header {
 #define MAX_SWAP_BADPAGES \
        ((__swapoffset(magic.magic) - __swapoffset(info.badpages)) / sizeof(int))
 
-#undef DEBUG_SWAP
-
 #include <asm/atomic.h>
 
 #define SWP_USED       1
@@ -69,7 +67,7 @@ extern struct list_head lru_cache;
 extern atomic_t nr_async_pages;
 extern struct inode swapper_inode;
 extern atomic_t page_cache_size;
-extern atomic_t buffermem;
+extern atomic_t buffermem_pages;
 
 /* Incomplete types for prototype declarations: */
 struct task_struct;
@@ -87,36 +85,35 @@ extern int try_to_free_pages(unsigned int gfp_mask);
 
 /* linux/mm/page_io.c */
 extern void rw_swap_page(int, struct page *, int);
-extern void rw_swap_page_nolock(int, unsigned long, char *, int);
-extern void swap_after_unlock_page (unsigned long entry);
+extern void rw_swap_page_nolock(int, pte_t, char *, int);
 
 /* linux/mm/page_alloc.c */
 
 /* linux/mm/swap_state.c */
 extern void show_swap_cache_info(void);
-extern void add_to_swap_cache(struct page *, unsigned long);
-extern int swap_duplicate(unsigned long);
+extern void add_to_swap_cache(struct page *, pte_t);
+extern int swap_duplicate(pte_t);
 extern int swap_check_entry(unsigned long);
-struct page * lookup_swap_cache(unsigned long);
-extern struct page * read_swap_cache_async(unsigned long, int);
+struct page * lookup_swap_cache(pte_t);
+extern struct page * read_swap_cache_async(pte_t, int);
 #define read_swap_cache(entry) read_swap_cache_async(entry, 1);
-extern int FASTCALL(swap_count(unsigned long));
-extern unsigned long acquire_swap_entry(struct page *page);
+extern int swap_count(struct page *);
+extern pte_t acquire_swap_entry(struct page *page);
 
 /*
  * Make these inline later once they are working properly.
  */
 extern void __delete_from_swap_cache(struct page *page);
 extern void delete_from_swap_cache(struct page *page);
-extern void free_page_and_swap_cache(unsigned long addr);
+extern void free_page_and_swap_cache(struct page *page);
 
 /* linux/mm/swapfile.c */
 extern unsigned int nr_swapfiles;
 extern struct swap_info_struct swap_info[];
 extern int is_swap_partition(kdev_t);
 void si_swapinfo(struct sysinfo *);
-unsigned long get_swap_page(void);
-extern void FASTCALL(swap_free(unsigned long));
+pte_t get_swap_page(void);
+extern void swap_free(pte_t);
 struct swap_list_t {
        int head;       /* head of priority-ordered swapfile list */
        int next;       /* swapfile to be used next */
@@ -158,7 +155,7 @@ static inline int is_page_shared(struct page *page)
                return 1;
        count = page_count(page);
        if (PageSwapCache(page))
-               count += swap_count(page->offset) - 2;
+               count += swap_count(page) - 2;
        return  count > 1;
 }
 
index 8d6db96e54199e289c391e2881eb4b4a8b237482..b228797f7b17e3addc517b025c68b28224abbc59 100644 (file)
@@ -339,12 +339,13 @@ extern int fg_console, last_console, want_console;
 
 extern int kmsg_redirect;
 
-extern unsigned long con_init(unsigned long);
+extern void con_init(void);
+extern void console_init(void);
 
 extern int rs_init(void);
 extern int lp_init(void);
 extern int pty_init(void);
-extern int tty_init(void);
+extern void tty_init(void);
 extern int ip2_init(void);
 extern int pcxe_init(void);
 extern int pc_init(void);
@@ -393,7 +394,7 @@ extern int n_tty_ioctl(struct tty_struct * tty, struct file * file,
 
 /* serial.c */
 
-extern long serial_console_init(long kmem_start, long kmem_end);
+extern void serial_console_init(void);
  
 /* pcxx.c */
 
index f0d17e56c5348bbeff4baa04cf366d77809c2f89..c81acaeaa8ea6fb4271ae18e41dae6560cdb62ce 100644 (file)
@@ -24,6 +24,7 @@
 #include <linux/blk.h>
 #include <linux/hdreg.h>
 #include <linux/iobuf.h>
+#include <linux/bootmem.h>
 
 #include <asm/io.h>
 #include <asm/bugs.h>
@@ -79,7 +80,6 @@ static int init(void *);
 
 extern void init_IRQ(void);
 extern void init_modules(void);
-extern long console_init(long, long);
 extern void sock_init(void);
 extern void fork_init(unsigned long);
 extern void mca_init(void);
@@ -110,9 +110,6 @@ extern void dquot_init_hash(void);
 
 extern void time_init(void);
 
-static unsigned long memory_start = 0;
-static unsigned long memory_end = 0;
-
 int rows, cols;
 
 #ifdef CONFIG_BLK_DEV_INITRD
@@ -423,7 +420,7 @@ static void __init parse_options(char *line)
 }
 
 
-extern void setup_arch(char **, unsigned long *, unsigned long *);
+extern void setup_arch(char **);
 extern void cpu_idle(void);
 
 #ifndef __SMP__
@@ -450,15 +447,15 @@ static void __init smp_init(void)
 asmlinkage void __init start_kernel(void)
 {
        char * command_line;
-
+       unsigned long mempages;
 /*
  * Interrupts are still disabled. Do necessary setups, then
  * enable them
  */
        lock_kernel();
        printk(linux_banner);
-       setup_arch(&command_line, &memory_start, &memory_end);
-       memory_start = paging_init(memory_start,memory_end);
+       setup_arch(&command_line);
+       paging_init();
        trap_init();
        init_IRQ();
        sched_init();
@@ -470,40 +467,45 @@ asmlinkage void __init start_kernel(void)
         * we've done PCI setups etc, and console_init() must be aware of
         * this. But we do want output early, in case something goes wrong.
         */
-       memory_start = console_init(memory_start,memory_end);
+       console_init();
 #ifdef CONFIG_MODULES
        init_modules();
 #endif
        if (prof_shift) {
-               prof_buffer = (unsigned int *) memory_start;
+               unsigned int size;
                /* only text is profiled */
                prof_len = (unsigned long) &_etext - (unsigned long) &_stext;
                prof_len >>= prof_shift;
-               memory_start += prof_len * sizeof(unsigned int);
-               memset(prof_buffer, 0, prof_len * sizeof(unsigned int));
+               
+               size = prof_len * sizeof(unsigned int) + PAGE_SIZE-1;
+               prof_buffer = (unsigned int *) alloc_bootmem(size);
+               memset(prof_buffer, 0, size);
        }
 
-       memory_start = kmem_cache_init(memory_start, memory_end);
+       kmem_cache_init();
        sti();
        calibrate_delay();
 #ifdef CONFIG_BLK_DEV_INITRD
+       // FIXME, use the bootmem.h interface.
        if (initrd_start && !initrd_below_start_ok && initrd_start < memory_start) {
                printk(KERN_CRIT "initrd overwritten (0x%08lx < 0x%08lx) - "
                    "disabling it.\n",initrd_start,memory_start);
                initrd_start = 0;
        }
 #endif
-       mem_init(memory_start,memory_end);
+       mem_init();
        kmem_cache_sizes_init();
 #ifdef CONFIG_PROC_FS
        proc_root_init();
 #endif
-       fork_init(memory_end-memory_start);
+       mempages = num_physpages;
+
+       fork_init(mempages);
        filescache_init();
        dcache_init();
        vma_init();
-       buffer_init(memory_end-memory_start);
-       page_cache_init(memory_end-memory_start);
+       buffer_init(mempages);
+       page_cache_init(mempages);
        kiobuf_init();
        signals_init();
        inode_init();
index 71a2b4eb779c854e2e64eda9eaecc558ea21a25a..653634ca81e8bfcd300cc30f42b9bc2bfc64cc47 100644 (file)
--- a/ipc/shm.c
+++ b/ipc/shm.c
@@ -1,13 +1,14 @@
 /*
  * linux/ipc/shm.c
  * Copyright (C) 1992, 1993 Krishna Balasubramanian
- *         Many improvements/fixes by Bruno Haible.
+ *      Many improvements/fixes by Bruno Haible.
  * Replaced `struct shm_desc' by `struct vm_area_struct', July 1994.
  * Fixed the shm swap deallocation (shm_unuse()), August 1998 Andrea Arcangeli.
  *
  * /proc/sysvipc/shm support (c) 1999 Dragos Acostachioaie <dragos@iname.com>
  * BIGMEM support, Andrea Arcangeli <andrea@suse.de>
  * SMP thread shm, Jean-Luc Boyard <jean-luc.boyard@siemens.fr>
+ * HIGHMEM support, Ingo Molnar <mingo@redhat.com>
  */
 
 #include <linux/config.h>
 #include <linux/vmalloc.h>
 #include <linux/pagemap.h>
 #include <linux/proc_fs.h>
-#include <linux/bigmem.h>
+#include <linux/highmem.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 
 extern int ipcperms (struct ipc_perm *ipcp, short shmflg);
-extern unsigned long get_swap_page (void);
 static int findkey (key_t key);
 static int newseg (key_t key, int shmflg, int size);
 static int shm_map (struct vm_area_struct *shmd);
 static void killseg (int id);
 static void shm_open (struct vm_area_struct *shmd);
 static void shm_close (struct vm_area_struct *shmd);
-static unsigned long shm_nopage(struct vm_area_struct *, unsigned long, int);
+static struct page * shm_nopage(struct vm_area_struct *, unsigned long, int);
 static int shm_swapout(struct vm_area_struct *, struct page *);
 #ifdef CONFIG_PROC_FS
 static int sysvipc_shm_read_proc(char *buffer, char **start, off_t offset, int length, int *eof, void *data);
@@ -108,7 +108,7 @@ static int newseg (key_t key, int shmflg, int size)
 {
        struct shmid_kernel *shp;
        int numpages = (size + PAGE_SIZE -1) >> PAGE_SHIFT;
-       int id, i;
+       int id;
 
        if (size < SHMMIN)
                return -EINVAL;
@@ -131,7 +131,7 @@ found:
                return -ENOMEM;
        }
        lock_kernel();
-       shp->shm_pages = (ulong *) vmalloc (numpages*sizeof(ulong));
+       shp->shm_pages = (pte_t *) vmalloc (numpages*sizeof(pte_t));
        unlock_kernel();
        if (!shp->shm_pages) {
                kfree(shp);
@@ -141,7 +141,8 @@ found:
                return -ENOMEM;
        }
 
-       for (i = 0; i < numpages; shp->shm_pages[i++] = 0);
+       memset(shp->shm_pages, 0, numpages*sizeof(pte_t));
+
        shp->u.shm_perm.key = key;
        shp->u.shm_perm.mode = (shmflg & S_IRWXUGO);
        shp->u.shm_perm.cuid = shp->u.shm_perm.uid = current->euid;
@@ -214,33 +215,29 @@ static void killseg (int id)
        int rss, swp;
 
        shp = shm_segs[id];
-       if (shp == IPC_NOID || shp == IPC_UNUSED) {
-               printk ("shm nono: killseg called on unused seg id=%d\n", id);
-               return;
-       }
+       if (shp == IPC_NOID || shp == IPC_UNUSED)
+               BUG();
        shp->u.shm_perm.seq++;     /* for shmat */
        shm_seq = (shm_seq+1) % ((unsigned)(1<<31)/SHMMNI); /* increment, but avoid overflow */
        shm_segs[id] = (struct shmid_kernel *) IPC_UNUSED;
        used_segs--;
        if (id == max_shmid)
                while (max_shmid && (shm_segs[--max_shmid] == IPC_UNUSED));
-       if (!shp->shm_pages) {
-               printk ("shm nono: killseg shp->pages=NULL. id=%d\n", id);
-               return;
-       }
+       if (!shp->shm_pages)
+               BUG();
        spin_unlock(&shm_lock);
        numpages = shp->shm_npages;
        for (i = 0, rss = 0, swp = 0; i < numpages ; i++) {
                pte_t pte;
-               pte = __pte(shp->shm_pages[i]);
+               pte = shp->shm_pages[i];
                if (pte_none(pte))
                        continue;
                if (pte_present(pte)) {
-                       free_page (pte_page(pte));
+                       __free_page (pte_page(pte));
                        rss++;
                } else {
                        lock_kernel();
-                       swap_free(pte_val(pte));
+                       swap_free(pte);
                        unlock_kernel();
                        swp++;
                }
@@ -484,16 +481,12 @@ asmlinkage long sys_shmat (int shmid, char *shmaddr, int shmflg, ulong *raddr)
 
        down(&current->mm->mmap_sem);
        spin_lock(&shm_lock);
-       if (shmid < 0) {
-               /* printk("shmat() -> EINVAL because shmid = %d < 0\n",shmid); */
+       if (shmid < 0)
                goto out;
-       }
 
        shp = shm_segs[id = (unsigned int) shmid % SHMMNI];
-       if (shp == IPC_UNUSED || shp == IPC_NOID) {
-               /* printk("shmat() -> EINVAL because shmid = %d is invalid\n",shmid); */
+       if (shp == IPC_UNUSED || shp == IPC_NOID)
                goto out;
-       }
 
        if (!(addr = (ulong) shmaddr)) {
                if (shmflg & SHM_REMAP)
@@ -526,16 +519,9 @@ asmlinkage long sys_shmat (int shmid, char *shmaddr, int shmflg, ulong *raddr)
         */
        if (addr < current->mm->start_stack &&
            addr > current->mm->start_stack - PAGE_SIZE*(shp->shm_npages + 4))
-       {
-               /* printk("shmat() -> EINVAL because segment intersects stack\n"); */
                goto out;
-       }
-       if (!(shmflg & SHM_REMAP))
-               if ((shmd = find_vma_intersection(current->mm, addr, addr + shp->u.shm_segsz))) {
-                       /* printk("shmat() -> EINVAL because the interval [0x%lx,0x%lx) intersects an already mapped interval [0x%lx,0x%lx).\n",
-                               addr, addr + shp->shm_segsz, shmd->vm_start, shmd->vm_end); */
-                       goto out;
-               }
+       if (!(shmflg & SHM_REMAP) && find_vma_intersection(current->mm, addr, addr + shp->u.shm_segsz))
+               goto out;
 
        err = -EACCES;
        if (ipcperms(&shp->u.shm_perm, shmflg & SHM_RDONLY ? S_IRUGO : S_IRUGO|S_IWUGO))
@@ -568,7 +554,7 @@ asmlinkage long sys_shmat (int shmid, char *shmaddr, int shmflg, ulong *raddr)
        shmd->vm_offset = 0;
        shmd->vm_ops = &shm_vm_ops;
 
-       shp->u.shm_nattch++;            /* prevent destruction */
+       shp->u.shm_nattch++;        /* prevent destruction */
        spin_unlock(&shm_lock);
        err = shm_map (shmd);
        spin_lock(&shm_lock);
@@ -668,86 +654,76 @@ static int shm_swapout(struct vm_area_struct * vma, struct page * page)
 /*
  * page not present ... go through shm_pages
  */
-static unsigned long shm_nopage(struct vm_area_struct * shmd, unsigned long address, int no_share)
+static struct page * shm_nopage(struct vm_area_struct * shmd, unsigned long address, int no_share)
 {
        pte_t pte;
        struct shmid_kernel *shp;
        unsigned int idx;
-       unsigned long page;
-       struct page * page_map;
+       struct page * page;
 
        shp = *(struct shmid_kernel **) shmd->vm_private_data;
        idx = (address - shmd->vm_start + shmd->vm_offset) >> PAGE_SHIFT;
 
-#ifdef DEBUG_SHM
-       if (shp == IPC_UNUSED || shp == IPC_NOID) {
-               printk ("shm_nopage: id=%d invalid. Race.\n", id);
-               return 0;
-       }
-       if (idx >= shp->shm_npages) {
-               printk ("shm_nopage : too large page index. id=%d\n", id);
-               return 0;
-       }
-#endif
-
        spin_lock(&shm_lock);
- again:
-       pte = __pte(shp->shm_pages[idx]);
+again:
+       pte = shp->shm_pages[idx];
        if (!pte_present(pte)) {
                if (pte_none(pte)) {
                        spin_unlock(&shm_lock);
-                       page = __get_free_page(GFP_BIGUSER);
+                       page = get_free_highpage(GFP_HIGHUSER);
                        if (!page)
                                goto oom;
-                       clear_bigpage(page);
+                       clear_highpage(page);
                        spin_lock(&shm_lock);
-                       if (pte_val(pte) != shp->shm_pages[idx])
+                       if (pte_val(pte) != pte_val(shp->shm_pages[idx]))
                                goto changed;
                } else {
-                       unsigned long entry = pte_val(pte);
+                       pte_t entry = pte;
 
                        spin_unlock(&shm_lock);
-                       page_map = lookup_swap_cache(entry);
-                       if (!page_map) {
+                       BUG();
+                       page = lookup_swap_cache(entry);
+                       if (!page) {
                                lock_kernel();
                                swapin_readahead(entry);
-                               page_map = read_swap_cache(entry);
+                               page = read_swap_cache(entry);
                                unlock_kernel();
-                               if (!page_map)
+                               if (!page)
                                        goto oom;
                        }
-                       delete_from_swap_cache(page_map);
-                       page_map = replace_with_bigmem(page_map);
-                       page = page_address(page_map);
+                       delete_from_swap_cache(page);
+                       page = replace_with_highmem(page);
                        lock_kernel();
                        swap_free(entry);
                        unlock_kernel();
                        spin_lock(&shm_lock);
                        shm_swp--;
-                       pte = __pte(shp->shm_pages[idx]);
+                       pte = shp->shm_pages[idx];
                        if (pte_present(pte))
                                goto present;
                }
                shm_rss++;
                pte = pte_mkdirty(mk_pte(page, PAGE_SHARED));
-               shp->shm_pages[idx] = pte_val(pte);
+               shp->shm_pages[idx] = pte;
        } else
                --current->maj_flt;  /* was incremented in do_no_page */
 
-done:  /* pte_val(pte) == shp->shm_pages[idx] */
-       get_page(mem_map + MAP_NR(pte_page(pte)));
+done:
+       /* pte_val(pte) == shp->shm_pages[idx] */
+       get_page(pte_page(pte));
        spin_unlock(&shm_lock);
        current->min_flt++;
        return pte_page(pte);
 
 changed:
-       free_page(page);
+       __free_page(page);
        goto again;
 present:
-       free_page(page);
+       if (page)
+               free_page_and_swap_cache(page);
        goto done;
 oom:
-       return -1;
+       return (struct page *)(-1);
 }
 
 /*
@@ -760,7 +736,7 @@ int shm_swap (int prio, int gfp_mask)
 {
        pte_t page;
        struct shmid_kernel *shp;
-       unsigned long swap_nr;
+       pte_t swap_entry;
        unsigned long id, idx;
        int loop = 0;
        int counter;
@@ -768,7 +744,7 @@ int shm_swap (int prio, int gfp_mask)
        
        counter = shm_rss >> prio;
        lock_kernel();
-       if (!counter || !(swap_nr = get_swap_page())) {
+       if (!counter || !pte_val(swap_entry = get_swap_page())) {
                unlock_kernel();
                return 0;
        }
@@ -795,36 +771,37 @@ int shm_swap (int prio, int gfp_mask)
        if (idx >= shp->shm_npages)
                goto next_id;
 
-       page = __pte(shp->shm_pages[idx]);
+       page = shp->shm_pages[idx];
        if (!pte_present(page))
                goto check_table;
-       page_map = &mem_map[MAP_NR(pte_page(page))];
+       page_map = pte_page(page);
        if ((gfp_mask & __GFP_DMA) && !PageDMA(page_map))
                goto check_table;
-       if (!(gfp_mask & __GFP_BIGMEM) && PageBIGMEM(page_map))
+       if (!(gfp_mask & __GFP_HIGHMEM) && PageHighMem(page_map))
                goto check_table;
        swap_attempts++;
 
        if (--counter < 0) { /* failed */
- failed:
+failed:
                spin_unlock(&shm_lock);
                lock_kernel();
-               swap_free (swap_nr);
+               swap_free(swap_entry);
                unlock_kernel();
                return 0;
        }
-       if (page_count(mem_map + MAP_NR(pte_page(page))) != 1)
+       if (page_count(page_map))
                goto check_table;
-       if (!(page_map = prepare_bigmem_swapout(page_map)))
+       if (!(page_map = prepare_highmem_swapout(page_map)))
                goto check_table;
-       shp->shm_pages[idx] = swap_nr;
+       shp->shm_pages[idx] = swap_entry;
        swap_successes++;
        shm_swp++;
        shm_rss--;
        spin_unlock(&shm_lock);
+
        lock_kernel();
-       swap_duplicate(swap_nr);
-       add_to_swap_cache(page_map, swap_nr);
+       swap_duplicate(swap_entry);
+       add_to_swap_cache(page_map, swap_entry);
        rw_swap_page(WRITE, page_map, 0);
        unlock_kernel();
 
@@ -836,13 +813,13 @@ int shm_swap (int prio, int gfp_mask)
  * Free the swap entry and set the new pte for the shm page.
  */
 static void shm_unuse_page(struct shmid_kernel *shp, unsigned long idx,
-                          unsigned long page, unsigned long entry)
+                       pte_t entry, struct page *page)
 {
        pte_t pte;
 
        pte = pte_mkdirty(mk_pte(page, PAGE_SHARED));
-       shp->shm_pages[idx] = pte_val(pte);
-       get_page(mem_map + MAP_NR(page));
+       shp->shm_pages[idx] = pte;
+       get_page(page);
        shm_rss++;
 
        shm_swp--;
@@ -856,20 +833,21 @@ static void shm_unuse_page(struct shmid_kernel *shp, unsigned long idx,
 /*
  * unuse_shm() search for an eventually swapped out shm page.
  */
-void shm_unuse(unsigned long entry, unsigned long page)
+void shm_unuse(pte_t entry, struct page *page)
 {
        int i, n;
 
        spin_lock(&shm_lock);
-       for (i = 0; i < SHMMNI; i++)
-               if (shm_segs[i] != IPC_UNUSED && shm_segs[i] != IPC_NOID)
-                       for (n = 0; n < shm_segs[i]->shm_npages; n++)
-                               if (shm_segs[i]->shm_pages[n] == entry)
-                               {
-                                       shm_unuse_page(shm_segs[i], n,
-                                                      page, entry);
-                                       return;
-                               }
+       for (i = 0; i < SHMMNI; i++) {
+               struct shmid_kernel *seg = shm_segs[i];
+               if ((seg == IPC_UNUSED) || (seg == IPC_NOID))
+                       continue;
+               for (n = 0; n < seg->shm_npages; n++)
+                       if (pte_val(seg->shm_pages[n]) == pte_val(entry)) {
+                               shm_unuse_page(seg, n, entry, page);
+                               return;
+                       }
+       }
        spin_unlock(&shm_lock);
 }
 
index 37794d726ee3c8a06fbd99aa27c5db0b94e1528e..9f3d9f5077afe39d5e7dfb69c73cb5c690f6b5d4 100644 (file)
@@ -157,7 +157,7 @@ int alloc_uid(struct task_struct *p)
        return 0;
 }
 
-void __init fork_init(unsigned long memsize)
+void __init fork_init(unsigned long mempages)
 {
        int i;
 
@@ -175,7 +175,7 @@ void __init fork_init(unsigned long memsize)
         * value: the thread structures can take up at most half
         * of memory.
         */
-       max_threads = memsize / THREAD_SIZE / 2;
+       max_threads = mempages / (THREAD_SIZE/PAGE_SIZE) / 2;
 
        init_task.rlim[RLIMIT_NPROC].rlim_cur = max_threads/2;
        init_task.rlim[RLIMIT_NPROC].rlim_max = max_threads/2;
index 330ce3efebc494e2664ce70c66f9661c3e90fb6d..843bcaeb87dbc435132446dd140194a3ae5b131a 100644 (file)
@@ -22,7 +22,7 @@
 
 #include <asm/uaccess.h>
 
-#define LOG_BUF_LEN    (16384)
+#define LOG_BUF_LEN    (16384*16)
 #define LOG_BUF_MASK   (LOG_BUF_LEN-1)
 
 static char buf[1024];
index 4327f9d1e1a5f35538b81d75809fb4a557d067b7..6d14da625447dc991480399a8a5651b028e7389f 100644 (file)
@@ -10,7 +10,7 @@
 #include <linux/sched.h>
 #include <linux/errno.h>
 #include <linux/mm.h>
-#include <linux/bigmem.h>
+#include <linux/highmem.h>
 
 #include <asm/pgtable.h>
 #include <asm/uaccess.h>
@@ -23,7 +23,9 @@ static int access_one_page(struct task_struct * tsk, struct vm_area_struct * vma
        pgd_t * pgdir;
        pmd_t * pgmiddle;
        pte_t * pgtable;
-       unsigned long page;
+       unsigned long mapnr;
+       unsigned long maddr; 
+       struct page *page;
 
 repeat:
        pgdir = pgd_offset(vma->vm_mm, addr);
@@ -39,27 +41,25 @@ repeat:
        pgtable = pte_offset(pgmiddle, addr);
        if (!pte_present(*pgtable))
                goto fault_in_page;
-       page = pte_page(*pgtable);
+       mapnr = pte_pagenr(*pgtable);
        if (write && (!pte_write(*pgtable) || !pte_dirty(*pgtable)))
                goto fault_in_page;
-       if (MAP_NR(page) >= max_mapnr)
+       if (mapnr >= max_mapnr)
                return 0;
+       page = mem_map + mapnr;
        flush_cache_page(vma, addr);
-       {
-               void *src = (void *) (page + (addr & ~PAGE_MASK));
-               void *dst = buf;
 
-               if (write) {
-                       dst = src;
-                       src = buf;
-               }
-               src = (void *) kmap((unsigned long) src, KM_READ);
-               dst = (void *) kmap((unsigned long) dst, KM_WRITE);
-               memcpy(dst, src, len);
-               kunmap((unsigned long) src, KM_READ);
-               kunmap((unsigned long) dst, KM_WRITE);
+       if (write) {
+               maddr = kmap(page, KM_WRITE);
+               memcpy((char *)maddr + (addr & ~PAGE_MASK), buf, len);
+               flush_page_to_ram(maddr);
+               kunmap(maddr, KM_WRITE);
+       } else {
+               maddr = kmap(page, KM_READ);
+               memcpy(buf, (char *)maddr + (addr & ~PAGE_MASK), len);
+               flush_page_to_ram(maddr);
+               kunmap(maddr, KM_READ);
        }
-       flush_page_to_ram(page);
        return len;
 
 fault_in_page:
@@ -69,11 +69,11 @@ fault_in_page:
        return 0;
 
 bad_pgd:
-       printk("ptrace: bad pgd in '%s' at %08lx (%08lx)\n", tsk->comm, addr, pgd_val(*pgdir));
+       pgd_ERROR(*pgdir);
        return 0;
 
 bad_pmd:
-       printk("ptrace: bad pmd in '%s' at %08lx (%08lx)\n", tsk->comm, addr, pmd_val(*pgmiddle));
+       pmd_ERROR(*pgmiddle);
        return 0;
 }
 
index 68404aa6794c108103a1d5bcb268565d7e9846f2..31c1a62316447430899fd0565ce38219147b5cee 100644 (file)
@@ -9,11 +9,11 @@
 
 O_TARGET := mm.o
 O_OBJS  := memory.o mmap.o filemap.o mprotect.o mlock.o mremap.o \
-           vmalloc.o slab.o \
-           swap.o vmscan.o page_io.o page_alloc.o swap_state.o swapfile.o
+           vmalloc.o slab.o bootmem.o swap.o vmscan.o page_io.o \
+           page_alloc.o swap_state.o swapfile.o
 
-ifeq ($(CONFIG_BIGMEM),y)
-O_OBJS += bigmem.o
+ifeq ($(CONFIG_HIGHMEM),y)
+O_OBJS += highmem.o
 endif
 
 include $(TOPDIR)/Rules.make
diff --git a/mm/bigmem.c b/mm/bigmem.c
deleted file mode 100644 (file)
index af63e86..0000000
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * BIGMEM common code and variables.
- *
- * (C) 1999 Andrea Arcangeli, SuSE GmbH, andrea@suse.de
- *          Gerhard Wichert, Siemens AG, Gerhard.Wichert@pdb.siemens.de
- */
-
-#include <linux/mm.h>
-#include <linux/pagemap.h>
-#include <linux/bigmem.h>
-
-unsigned long bigmem_mapnr;
-int nr_free_bigpages = 0;
-
-struct page * prepare_bigmem_swapout(struct page * page)
-{
-       /* if this is a bigmem page so it can't be swapped out directly
-          otherwise the b_data buffer addresses will break
-          the lowlevel device drivers. */
-       if (PageBIGMEM(page)) {
-               unsigned long regular_page;
-               unsigned long vaddr;
-
-               regular_page = __get_free_page(GFP_ATOMIC);
-               if (!regular_page)
-                       return NULL;
-
-               vaddr = kmap(page_address(page), KM_READ);
-               copy_page(regular_page, vaddr);
-               kunmap(vaddr, KM_READ);
-
-               /* ok, we can just forget about our bigmem page since 
-                  we stored its data into the new regular_page. */
-               __free_page(page);
-
-               page = MAP_NR(regular_page) + mem_map;
-       }
-       return page;
-}
-
-struct page * replace_with_bigmem(struct page * page)
-{
-       if (!PageBIGMEM(page) && nr_free_bigpages) {
-               unsigned long kaddr;
-
-               kaddr = __get_free_page(GFP_ATOMIC|GFP_BIGMEM);
-               if (kaddr) {
-                       struct page * bigmem_page;
-
-                       bigmem_page = MAP_NR(kaddr) + mem_map;
-                       if (PageBIGMEM(bigmem_page)) {
-                               unsigned long vaddr;
-
-                               vaddr = kmap(kaddr, KM_WRITE);
-                               copy_page(vaddr, page_address(page));
-                               kunmap(vaddr, KM_WRITE);
-
-                               /* Preserve the caching of the swap_entry. */
-                               bigmem_page->offset = page->offset;
-
-                               /* We can just forget the old page since 
-                                  we stored its data into the new
-                                  bigmem_page. */
-                               __free_page(page);
-
-                               page = bigmem_page;
-                       }
-               }
-       }
-       return page;
-}
diff --git a/mm/bootmem.c b/mm/bootmem.c
new file mode 100644 (file)
index 0000000..07dadb9
--- /dev/null
@@ -0,0 +1,217 @@
+/*
+ *  linux/mm/initmem.c
+ *
+ *  Copyright (C) 1999 Ingo Molnar
+ *
+ *  simple boot-time physical memory area allocator and
+ *  free memory collector. It's used to deal with reserved
+ *  system memory and memory holes as well.
+ */
+
+#include <linux/config.h>
+#include <linux/mm.h>
+#include <linux/kernel_stat.h>
+#include <linux/swap.h>
+#include <linux/swapctl.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/bootmem.h>
+
+/*
+ * Pointer to a bitmap - the bits represent all physical memory pages
+ * from physical address 0 to physical address end_mem.
+ *
+ * Access to this subsystem has to be serialized externally. (this is
+ * true for the boot process anyway)
+ */
+static void * bootmem_map = NULL;
+unsigned long max_low_pfn;
+
+/*
+ * Called once to set up the allocator itself.
+ */
+unsigned long __init init_bootmem (unsigned long start, unsigned long pages)
+{
+       unsigned long mapsize = (pages+7)/8;
+
+       if (bootmem_map)
+               BUG();
+       bootmem_map = __va(start << PAGE_SHIFT);
+       max_low_pfn = pages;
+
+       /*
+        * Initially all pages are reserved - setup_arch() has to
+        * register free RAM areas explicitly.
+        */
+       memset(bootmem_map, 0xff, mapsize);
+
+       return mapsize;
+}
+
+/*
+ * Marks a particular physical memory range as usable. Usable RAM
+ * might be used for boot-time allocations - or it might get added
+ * to the free page pool later on.
+ */
+void __init reserve_bootmem (unsigned long addr, unsigned long size)
+{
+       unsigned long i;
+       /*
+        * round up, partially reserved pages are considered
+        * fully reserved.
+        */
+       unsigned long end = (addr + size + PAGE_SIZE-1)/PAGE_SIZE;
+
+       if (!bootmem_map) BUG();
+       if (!size) BUG();
+
+       if (end > max_low_pfn)
+               BUG();
+       for (i = addr/PAGE_SIZE; i < end; i++)
+               if (test_and_set_bit(i, bootmem_map))
+                       BUG();
+}
+
+void __init free_bootmem (unsigned long addr, unsigned long size)
+{
+       unsigned long i;
+       /*
+        * round down end of usable mem, partially free pages are
+        * considered reserved.
+        */
+       unsigned long end = (addr + size)/PAGE_SIZE;
+
+       if (!bootmem_map) BUG();
+       if (!size) BUG();
+
+       if (end > max_low_pfn)
+               BUG();
+       for (i = addr/PAGE_SIZE; i < end; i++) {
+               if (!test_and_clear_bit(i, bootmem_map))
+                       BUG();
+       }
+}
+
+/*
+ * We 'merge' subsequent allocations to save space. We might 'lose'
+ * some fraction of a page if allocations cannot be satisfied due to
+ * size constraints on boxes where there is physical RAM space
+ * fragmentation - in these cases * (mostly large memory boxes) this
+ * is not a problem.
+ *
+ * On low memory boxes we get it right in 100% of the cases.
+ */
+static unsigned long last_pos = 0;
+static unsigned long last_offset = 0;
+
+/*
+ * alignment has to be a power of 2 value.
+ */
+void * __init __alloc_bootmem (unsigned long size, unsigned long align)
+{
+       int area = 0;
+       unsigned long i, start = 0, reserved;
+       void *ret;
+       unsigned long offset, remaining_size;
+       unsigned long areasize;
+
+       if (!bootmem_map) BUG();
+       if (!size) BUG();
+
+       areasize = (size+PAGE_SIZE-1)/PAGE_SIZE;
+
+       for (i = 0; i < max_low_pfn; i++) {
+               reserved = test_bit(i, bootmem_map);
+               if (!reserved) {
+                       if (!area) {
+                               area = 1;
+                               start = i;
+                       }
+                       if (i - start + 1 == areasize)
+                               goto found;
+               } else {
+                       area = 0;
+                       start = -1;
+               }
+       }
+       BUG();
+found:
+       if (start >= max_low_pfn)
+               BUG();
+
+       /*
+        * Is the next page of the previous allocation-end the start
+        * of this allocation's buffer? If yes then we can 'merge'
+        * the previous partial page with this allocation.
+        */
+       if (last_offset && (last_pos+1 == start)) {
+               offset = (last_offset+align-1) & ~(align-1);
+               if (offset > PAGE_SIZE)
+                       BUG();
+               remaining_size = PAGE_SIZE-offset;
+               if (remaining_size > PAGE_SIZE)
+                       BUG();
+               if (size < remaining_size) {
+                       areasize = 0;
+                       // last_pos unchanged
+                       last_offset = offset+size;
+                       ret = __va(last_pos*PAGE_SIZE + offset);
+               } else {
+                       size -= remaining_size;
+                       areasize = (size+PAGE_SIZE-1)/PAGE_SIZE;
+                       ret = __va(last_pos*PAGE_SIZE + offset);
+                       last_pos = start+areasize-1;
+                       last_offset = size;
+               }
+               last_offset &= ~PAGE_MASK;
+       } else {
+               last_pos = start + areasize - 1;
+               last_offset = size & ~PAGE_MASK;
+               ret = __va(start * PAGE_SIZE);
+       }
+       /*
+        * Reserve the area now:
+        */
+       for (i = start; i < start+areasize; i++)
+               if (test_and_set_bit(i, bootmem_map))
+                       BUG();
+
+       return ret;
+}
+
+unsigned long __init free_all_bootmem (void)
+{
+       struct page * page;
+       unsigned long i, count, total = 0;
+
+       if (!bootmem_map) BUG();
+
+       printk("freeing all bootmem().\n");
+       page = mem_map;
+       count = 0;
+       for (i = 0; i < max_low_pfn; i++, page++) {
+               if (!test_bit(i, bootmem_map)) {
+                       count++;
+                       ClearPageReserved(page);
+                       set_page_count(page, 1);
+                       __free_page(page);
+               }
+       }
+       total += count;
+       /*
+        * Now free the allocator bitmap itself, it's not
+        * needed anymore:
+        */
+       page = mem_map + MAP_NR(bootmem_map);
+       count = 0;
+       for (i = 0; i < (max_low_pfn/8 + PAGE_SIZE-1)/PAGE_SIZE; i++,page++) {
+               count++;
+               ClearPageReserved(page);
+               set_page_count(page, 1);
+               __free_page(page);
+       }
+       total += count;
+       bootmem_map = NULL;
+
+       return total;
+}
index 51624abcfc51a573a9b96700adc12ed89b0c0faf..6fdd065a7fad1467ad8428bab80ecacc3de5d40f 100644 (file)
@@ -21,6 +21,7 @@
 #include <linux/swapctl.h>
 #include <linux/slab.h>
 #include <linux/init.h>
+#include <linux/highmem.h>
 
 #include <asm/pgtable.h>
 #include <asm/uaccess.h>
@@ -75,24 +76,6 @@ static void remove_page_from_hash_queue(struct page * page)
        atomic_dec(&page_cache_size);
 }
 
-static void remove_page_from_inode_queue(struct page * page)
-{
-       struct inode * inode = page->inode;
-       struct page *prev, *next;
-
-       inode->i_nrpages--;
-       next = page->next;
-       prev = page->prev;
-       if (inode->i_pages == page)
-               inode->i_pages = next;
-       if (next)
-               next->prev = prev;
-       if (prev)
-               prev->next = next;
-       page->next = NULL;
-       page->prev = NULL;
-}
-
 /*
  * Remove a page from the page cache and free it. Caller has to make
  * sure the page is locked and that nobody else uses it - or that usage
@@ -112,13 +95,17 @@ void remove_inode_page(struct page *page)
 
 void invalidate_inode_pages(struct inode * inode)
 {
-       struct page ** p;
+       struct list_head *head, *curr;
        struct page * page;
 
+       head = &inode->i_pages;
 repeat:
        spin_lock(&pagecache_lock);
-       p = &inode->i_pages;
-       while ((page = *p) != NULL) {
+       curr = head->next;
+
+       while (curr != head) {
+               page = list_entry(curr, struct page, list);
+               curr = curr->next;
                get_page(page);
                if (TryLockPage(page)) {
                        spin_unlock(&pagecache_lock);
@@ -136,7 +123,6 @@ repeat:
                UnlockPage(page);
                page_cache_release(page);
                page_cache_release(page);
-
        }
        spin_unlock(&pagecache_lock);
 }
@@ -146,15 +132,21 @@ repeat:
  */
 void truncate_inode_pages(struct inode * inode, unsigned long start)
 {
-       struct page ** p;
+       struct list_head *head, *curr;
+       unsigned long offset;
        struct page * page;
        int partial = 0;
 
 repeat:
+       head = &inode->i_pages;
        spin_lock(&pagecache_lock);
-       p = &inode->i_pages;
-       while ((page = *p) != NULL) {
-               unsigned long offset = page->offset;
+       curr = head->next;
+       while (curr != head) {
+
+               page = list_entry(curr, struct page, list);
+               curr = curr->next;
+
+               offset = page->offset;
 
                /* page wholly truncated - free it */
                if (offset >= start) {
@@ -190,7 +182,6 @@ repeat:
                         */
                        goto repeat;
                }
-               p = &page->next;
                /*
                 * there is only one partial page possible.
                 */
@@ -200,17 +191,14 @@ repeat:
                offset = start - offset;
                /* partial truncate, clear end of page */
                if (offset < PAGE_CACHE_SIZE) {
-                       unsigned long address;
                        get_page(page);
                        spin_unlock(&pagecache_lock);
 
                        lock_page(page);
                        partial = 1;
 
-                       address = page_address(page);
-                       memset((void *) (offset + address), 0, PAGE_CACHE_SIZE - offset);
-                       flush_page_to_ram(address);
-
+                       memclear_highpage_flush(page, offset,
+                                               PAGE_CACHE_SIZE-offset);
                        if (inode->i_op->flushpage)
                                inode->i_op->flushpage(inode, page, offset);
                        /*
@@ -255,7 +243,7 @@ int shrink_mmap(int priority, int gfp_mask)
                /* don't account passes over not DMA pages */
                if ((gfp_mask & __GFP_DMA) && !PageDMA(page))
                        goto dispose_continue;
-               if (!(gfp_mask & __GFP_BIGMEM) && PageBIGMEM(page))
+               if (!(gfp_mask & __GFP_HIGHMEM) && PageHighMem(page))
                        goto dispose_continue;
 
                count--;
@@ -291,7 +279,7 @@ int shrink_mmap(int priority, int gfp_mask)
                                goto unlock_continue;
                        /* page was locked, inode can't go away under us */
                        if (!page->inode) {
-                               atomic_sub(PAGE_CACHE_SIZE, &buffermem);
+                               atomic_dec(&buffermem_pages);
                                goto made_buffer_progress;
                        }
                        spin_lock(&pagecache_lock);
@@ -431,16 +419,18 @@ static int waitfor_one_page(struct page *page)
 
 static int do_buffer_fdatasync(struct inode *inode, unsigned long start, unsigned long end, int (*fn)(struct page *))
 {
-       struct page *next;
+       struct list_head *head, *curr;
+       struct page *page;
        int retval = 0;
 
+       head = &inode->i_pages;
        start &= PAGE_MASK;
 
        spin_lock(&pagecache_lock);
-       next = inode->i_pages;
-       while (next) {
-               struct page *page = next;
-               next = page->next;
+       curr = head->next;
+       while (curr != head) {
+               page = list_entry(curr, struct page, list);
+               curr = curr->next;
                if (!page->buffers)
                        continue;
                if (page->offset >= end)
@@ -458,7 +448,7 @@ static int do_buffer_fdatasync(struct inode *inode, unsigned long start, unsigne
 
                UnlockPage(page);
                spin_lock(&pagecache_lock);
-               next = page->next;
+               curr = page->list.next;
                page_cache_release(page);
        }
        spin_unlock(&pagecache_lock);
@@ -487,6 +477,7 @@ static inline void __add_to_page_cache(struct page * page,
        struct inode * inode, unsigned long offset,
        struct page **hash)
 {
+       struct page *alias;
        unsigned long flags;
 
        flags = page->flags & ~((1 << PG_uptodate) | (1 << PG_error) | (1 << PG_referenced));
@@ -497,6 +488,9 @@ static inline void __add_to_page_cache(struct page * page,
        add_page_to_inode_queue(inode, page);
        __add_page_to_hash_queue(page, hash);
        lru_cache_add(page);
+       alias = __find_page_nolock(inode, offset, *hash);
+       if (alias != page)
+               BUG();
 }
 
 void add_to_page_cache(struct page * page, struct inode * inode, unsigned long offset)
@@ -532,10 +526,9 @@ int add_to_page_cache_unique(struct page * page,
  */
 static inline void page_cache_read(struct file * file, unsigned long offset) 
 {
-       unsigned long new_page;
        struct inode *inode = file->f_dentry->d_inode;
-       struct page ** hash = page_hash(inode, offset);
-       struct page * page; 
+       struct page **hash = page_hash(inode, offset);
+       struct page *page; 
 
        spin_lock(&pagecache_lock);
        page = __find_page_nolock(inode, offset, *hash); 
@@ -543,22 +536,20 @@ static inline void page_cache_read(struct file * file, unsigned long offset)
        if (page)
                return;
 
-       new_page = page_cache_alloc();
-       if (!new_page)
+       page = page_cache_alloc();
+       if (!page)
                return;
-       page = page_cache_entry(new_page);
 
        if (!add_to_page_cache_unique(page, inode, offset, hash)) {
                inode->i_op->readpage(file, page);
                page_cache_release(page);
                return;
        }
-
        /*
         * We arrive here in the unlikely event that someone 
         * raced with us and added our page to the cache first.
         */
-       page_cache_free(new_page);
+       page_cache_free(page);
        return;
 }
 
@@ -962,13 +953,13 @@ void do_generic_file_read(struct file * filp, loff_t *ppos, read_descriptor_t *
 {
        struct dentry *dentry = filp->f_dentry;
        struct inode *inode = dentry->d_inode;
-       size_t pos, pgpos, page_cache;
+       size_t pos, pgpos;
+       struct page *cached_page;
        int reada_ok;
        int error;
        int max_readahead = get_max_readahead(inode);
 
-       page_cache = 0;
-
+       cached_page = NULL;
        pos = *ppos;
        pgpos = pos & PAGE_CACHE_MASK;
 /*
@@ -1051,7 +1042,7 @@ page_ok:
                 * "pos" here (the actor routine has to update the user buffer
                 * pointers and the remaining count).
                 */
-               nr = actor(desc, (const char *) (page_address(page) + offset), nr);
+               nr = actor(desc, page, offset, nr);
                pos += nr;
                page_cache_release(page);
                if (nr && desc->count)
@@ -1105,10 +1096,10 @@ no_cached_page:
                 *
                 * We get here with the page cache lock held.
                 */
-               if (!page_cache) {
+               if (!cached_page) {
                        spin_unlock(&pagecache_lock);
-                       page_cache = page_cache_alloc();
-                       if (!page_cache) {
+                       cached_page = page_cache_alloc();
+                       if (!cached_page) {
                                desc->error = -ENOMEM;
                                break;
                        }
@@ -1126,29 +1117,35 @@ no_cached_page:
                /*
                 * Ok, add the new page to the hash-queues...
                 */
-               page = page_cache_entry(page_cache);
+               page = cached_page;
                __add_to_page_cache(page, inode, pos & PAGE_CACHE_MASK, hash);
                spin_unlock(&pagecache_lock);
+               cached_page = NULL;
 
-               page_cache = 0;
                goto readpage;
        }
 
        *ppos = pos;
        filp->f_reada = 1;
-       if (page_cache)
-               page_cache_free(page_cache);
+       if (cached_page)
+               page_cache_free(cached_page);
        UPDATE_ATIME(inode);
 }
 
-static int file_read_actor(read_descriptor_t * desc, const char *area, unsigned long size)
+static int file_read_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size)
 {
-       unsigned long left;
-       unsigned long count = desc->count;
+       unsigned long kaddr;
+       unsigned long left, count = desc->count;
 
        if (size > count)
                size = count;
-       left = __copy_to_user(desc->buf, area, size);
+       /*
+        * FIXME: We cannot yet sleep with kmaps held.
+        */
+       kaddr = kmap(page, KM_READ);
+       left = __copy_to_user(desc->buf, (void *)(kaddr+offset), size);
+       kunmap(kaddr, KM_READ);
+       
        if (left) {
                size -= left;
                desc->error = -EFAULT;
@@ -1187,8 +1184,9 @@ ssize_t generic_file_read(struct file * filp, char * buf, size_t count, loff_t *
        return retval;
 }
 
-static int file_send_actor(read_descriptor_t * desc, const char *area, unsigned long size)
+static int file_send_actor(read_descriptor_t * desc, struct page *page, unsigned long offset , unsigned long size)
 {
+       unsigned long kaddr;
        ssize_t written;
        unsigned long count = desc->count;
        struct file *file = (struct file *) desc->buf;
@@ -1198,7 +1196,9 @@ static int file_send_actor(read_descriptor_t * desc, const char *area, unsigned
                size = count;
        old_fs = get_fs();
        set_fs(KERNEL_DS);
-       written = file->f_op->write(file, area, size, &file->f_pos);
+       kaddr = kmap(page, KM_READ);
+       written = file->f_op->write(file, (char *)kaddr + offset, size, &file->f_pos);
+       kunmap(kaddr, KM_READ);
        set_fs(old_fs);
        if (written < 0) {
                desc->error = written;
@@ -1298,14 +1298,13 @@ out:
  * XXX - at some point, this should return unique values to indicate to
  *       the caller whether this is EIO, OOM, or SIGBUS.
  */
-static unsigned long filemap_nopage(struct vm_area_struct * area,
+static struct page * filemap_nopage(struct vm_area_struct * area,
        unsigned long address, int no_share)
 {
-       struct file * file = area->vm_file;
-       struct dentry * dentry = file->f_dentry;
-       struct inode * inode = dentry->d_inode;
-       struct page * page, **hash;
-       unsigned long old_page;
+       struct file *file = area->vm_file;
+       struct dentry *dentry = file->f_dentry;
+       struct inode *inode = dentry->d_inode;
+       struct page *page, **hash, *old_page;
 
        unsigned long offset = address - area->vm_start + area->vm_offset;
 
@@ -1317,7 +1316,7 @@ static unsigned long filemap_nopage(struct vm_area_struct * area,
         */
        if ((offset >= inode->i_size) &&
                (area->vm_flags & VM_SHARED) && (area->vm_mm == current->mm))
-               return 0;
+               return NULL;
 
        /*
         * Do we have something in the page cache already?
@@ -1340,12 +1339,14 @@ success:
         * Found the page and have a reference on it, need to check sharing
         * and possibly copy it over to another page..
         */
-       old_page = page_address(page);
+       old_page = page;
        if (no_share) {
-               unsigned long new_page = page_cache_alloc();
+               struct page *new_page = page_cache_alloc();
 
                if (new_page) {
-                       copy_page(new_page, old_page);
+                       if (PageHighMem(new_page) || PageHighMem(old_page))
+                               BUG();
+                       copy_highpage(new_page, old_page);
                        flush_page_to_ram(new_page);
                }
                page_cache_release(page);
@@ -1411,7 +1412,7 @@ page_not_uptodate:
         * mm layer so, possibly freeing the page cache page first.
         */
        page_cache_release(page);
-       return 0;
+       return NULL;
 }
 
 /*
@@ -1419,12 +1420,11 @@ page_not_uptodate:
  * if the disk is full.
  */
 static inline int do_write_page(struct inode * inode, struct file * file,
-       const char * page_addr, unsigned long offset)
+       struct page * page, unsigned long offset)
 {
        int retval;
        unsigned long size;
        int (*writepage) (struct file *, struct page *);
-       struct page * page;
 
        size = offset + PAGE_SIZE;
        /* refuse to extend file size.. */
@@ -1438,7 +1438,6 @@ static inline int do_write_page(struct inode * inode, struct file * file,
        size -= offset;
        retval = -EIO;
        writepage = inode->i_op->writepage;
-       page = mem_map + MAP_NR(page_addr);
        lock_page(page);
 
        retval = writepage(file, page);
@@ -1449,7 +1448,7 @@ static inline int do_write_page(struct inode * inode, struct file * file,
 
 static int filemap_write_page(struct vm_area_struct * vma,
                              unsigned long offset,
-                             unsigned long page,
+                             struct page * page,
                              int wait)
 {
        int result;
@@ -1466,7 +1465,7 @@ static int filemap_write_page(struct vm_area_struct * vma,
         * and file could be released ... increment the count to be safe.
         */
        get_file(file);
-       result = do_write_page(inode, file, (const char *) page, offset);
+       result = do_write_page(inode, file, page, offset);
        fput(file);
        return result;
 }
@@ -1480,7 +1479,7 @@ static int filemap_write_page(struct vm_area_struct * vma,
 extern void wakeup_bdflush(int);
 int filemap_swapout(struct vm_area_struct * vma, struct page * page)
 {
-       int retval = filemap_write_page(vma, page->offset, page_address(page), 0);
+       int retval = filemap_write_page(vma, page->offset, page, 0);
        wakeup_bdflush(0);
        return retval;
 }
@@ -1489,7 +1488,6 @@ static inline int filemap_sync_pte(pte_t * ptep, struct vm_area_struct *vma,
        unsigned long address, unsigned int flags)
 {
        pte_t pte = *ptep;
-       unsigned long pageaddr;
        struct page *page;
        int error;
 
@@ -1502,8 +1500,7 @@ static inline int filemap_sync_pte(pte_t * ptep, struct vm_area_struct *vma,
                flush_cache_page(vma, address);
                set_pte(ptep, pte_mkclean(pte));
                flush_tlb_page(vma, address);
-               pageaddr = pte_page(pte);
-               page = page_cache_entry(pageaddr);
+               page = pte_page(pte);
                get_page(page);
        } else {
                if (pte_none(pte))
@@ -1512,17 +1509,19 @@ static inline int filemap_sync_pte(pte_t * ptep, struct vm_area_struct *vma,
                pte_clear(ptep);
                flush_tlb_page(vma, address);
                if (!pte_present(pte)) {
-                       swap_free(pte_val(pte));
+                       swap_free(pte);
                        return 0;
                }
-               pageaddr = pte_page(pte);
+               page = pte_page(pte);
                if (!pte_dirty(pte) || flags == MS_INVALIDATE) {
-                       page_cache_free(pageaddr);
+                       page_cache_free(page);
                        return 0;
                }
        }
-       error = filemap_write_page(vma, address - vma->vm_start + vma->vm_offset, pageaddr, 1);
-       page_cache_free(pageaddr);
+       if (PageHighMem(page))
+               BUG();
+       error = filemap_write_page(vma, address - vma->vm_start + vma->vm_offset, page, 1);
+       page_cache_free(page);
        return error;
 }
 
@@ -1537,7 +1536,7 @@ static inline int filemap_sync_pte_range(pmd_t * pmd,
        if (pmd_none(*pmd))
                return 0;
        if (pmd_bad(*pmd)) {
-               printk("filemap_sync_pte_range: bad pmd (%08lx)\n", pmd_val(*pmd));
+               pmd_ERROR(*pmd);
                pmd_clear(pmd);
                return 0;
        }
@@ -1552,7 +1551,7 @@ static inline int filemap_sync_pte_range(pmd_t * pmd,
                error |= filemap_sync_pte(pte, vma, address + offset, flags);
                address += PAGE_SIZE;
                pte++;
-       } while (address < end);
+       } while (address && (address < end));
        return error;
 }
 
@@ -1567,7 +1566,7 @@ static inline int filemap_sync_pmd_range(pgd_t * pgd,
        if (pgd_none(*pgd))
                return 0;
        if (pgd_bad(*pgd)) {
-               printk("filemap_sync_pmd_range: bad pgd (%08lx)\n", pgd_val(*pgd));
+               pgd_ERROR(*pgd);
                pgd_clear(pgd);
                return 0;
        }
@@ -1582,7 +1581,7 @@ static inline int filemap_sync_pmd_range(pgd_t * pgd,
                error |= filemap_sync_pte_range(pmd, address, end - address, vma, offset, flags);
                address = (address + PMD_SIZE) & PMD_MASK;
                pmd++;
-       } while (address < end);
+       } while (address && (address < end));
        return error;
 }
 
@@ -1595,11 +1594,13 @@ static int filemap_sync(struct vm_area_struct * vma, unsigned long address,
 
        dir = pgd_offset(vma->vm_mm, address);
        flush_cache_range(vma->vm_mm, end - size, end);
-       while (address < end) {
+       if (address >= end)
+               BUG();
+       do {
                error |= filemap_sync_pmd_range(dir, address, end - address, vma, flags);
                address = (address + PGDIR_SIZE) & PGDIR_MASK;
                dir++;
-       }
+       } while (address && (address < end));
        flush_tlb_range(vma->vm_mm, end - size, end);
        return error;
 }
@@ -1775,12 +1776,13 @@ generic_file_write(struct file *file, const char *buf,
        struct inode    *inode = dentry->d_inode; 
        unsigned long   pos = *ppos;
        unsigned long   limit = current->rlim[RLIMIT_FSIZE].rlim_cur;
-       struct page     *page, **hash;
-       unsigned long   page_cache = 0;
+       struct page     *page, **hash, *cached_page;
        unsigned long   written;
        long            status;
        int             err;
 
+       cached_page = NULL;
+
        down(&inode->i_sem);
        err = file->f_error;
        if (err) {
@@ -1828,18 +1830,18 @@ generic_file_write(struct file *file, const char *buf,
 repeat_find:
                page = __find_lock_page(inode, pgpos, hash);
                if (!page) {
-                       if (!page_cache) {
-                               page_cache = page_cache_alloc();
-                               if (page_cache)
+                       if (!cached_page) {
+                               cached_page = page_cache_alloc();
+                               if (cached_page)
                                        goto repeat_find;
                                status = -ENOMEM;
                                break;
                        }
-                       page = page_cache_entry(page_cache);
+                       page = cached_page;
                        if (add_to_page_cache_unique(page,inode,pgpos,hash))
                                goto repeat_find;
 
-                       page_cache = 0;
+                       cached_page = NULL;
                }
 
                /* We have exclusive IO access to the page.. */
@@ -1870,8 +1872,8 @@ repeat_find:
        }
        *ppos = pos;
 
-       if (page_cache)
-               page_cache_free(page_cache);
+       if (cached_page)
+               page_cache_free(cached_page);
 
        err = written ? written : status;
 out:
@@ -1897,11 +1899,11 @@ void put_cached_page(unsigned long addr)
        page_cache_release(page);
 }
 
-void __init page_cache_init(unsigned long memory_size)
+void __init page_cache_init(unsigned long mempages)
 {
        unsigned long htable_size, order;
 
-       htable_size = memory_size >> PAGE_SHIFT;
+       htable_size = mempages;
        htable_size *= sizeof(struct page *);
        for(order = 0; (PAGE_SIZE << order) < htable_size; order++)
                ;
@@ -1921,5 +1923,5 @@ void __init page_cache_init(unsigned long memory_size)
               (1 << page_hash_bits), order, (PAGE_SIZE << order));
        if (!page_hash_table)
                panic("Failed to allocate page hash table\n");
-       memset(page_hash_table, 0, PAGE_HASH_SIZE * sizeof(struct page *));
+       memset((void *)page_hash_table, 0, PAGE_HASH_SIZE * sizeof(struct page *));
 }
diff --git a/mm/highmem.c b/mm/highmem.c
new file mode 100644 (file)
index 0000000..7665393
--- /dev/null
@@ -0,0 +1,81 @@
+/*
+ * High memory handling common code and variables.
+ *
+ * (C) 1999 Andrea Arcangeli, SuSE GmbH, andrea@suse.de
+ *          Gerhard Wichert, Siemens AG, Gerhard.Wichert@pdb.siemens.de
+ *
+ * Redesigned the x86 32-bit VM architecture to deal with
+ * 64-bit physical space. With current x86 CPUs this
+ * means up to 64 Gigabytes physical RAM.
+ *
+ * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
+ */
+
+#include <linux/mm.h>
+#include <linux/pagemap.h>
+#include <linux/highmem.h>
+
+unsigned long highmem_mapnr;
+unsigned long nr_free_highpages = 0;
+
+struct page * prepare_highmem_swapout(struct page * page)
+{
+       unsigned long regular_page;
+       unsigned long vaddr;
+       /*
+        * If this is a highmem page so it can't be swapped out directly
+        * otherwise the b_data buffer addresses will break
+        * the lowlevel device drivers.
+        */
+       if (!PageHighMem(page))
+               return page;
+
+       regular_page = __get_free_page(GFP_ATOMIC);
+       if (!regular_page)
+               return NULL;
+
+       vaddr = kmap(page, KM_READ);
+       copy_page((void *)regular_page, (void *)vaddr);
+       kunmap(vaddr, KM_READ);
+
+       /*
+        * ok, we can just forget about our highmem page since 
+        * we stored its data into the new regular_page.
+        */
+       __free_page(page);
+
+       return mem_map + MAP_NR(regular_page);
+}
+
+struct page * replace_with_highmem(struct page * page)
+{
+       struct page *highpage;
+       unsigned long vaddr;
+
+       if (PageHighMem(page) || !nr_free_highpages)
+               return page;
+
+       highpage = get_free_highpage(GFP_ATOMIC|__GFP_HIGHMEM);
+       if (!highpage)
+               return page;
+       if (!PageHighMem(highpage)) {
+               __free_page(highpage);
+               return page;
+       }
+
+       vaddr = kmap(highpage, KM_WRITE);
+       copy_page((void *)vaddr, (void *)page_address(page));
+       kunmap(vaddr, KM_WRITE);
+
+       /* Preserve the caching of the swap_entry. */
+       highpage->offset = page->offset;
+       highpage->inode = page->inode;
+
+       /*
+        * We can just forget the old page since 
+        * we stored its data into the new highmem-page.
+        */
+       __free_page(page);
+
+       return highpage;
+}
index 5498dbcf03615b9aa468f07da2593ce50a9f2374..88979392828656b6147836f90b4f563c95961a31 100644 (file)
@@ -43,7 +43,7 @@
 #include <linux/smp_lock.h>
 #include <linux/swapctl.h>
 #include <linux/iobuf.h>
-#include <linux/bigmem.h>
+#include <linux/highmem.h>
 
 #include <asm/uaccess.h>
 #include <asm/pgtable.h>
 unsigned long max_mapnr = 0;
 unsigned long num_physpages = 0;
 void * high_memory = NULL;
+struct page *highmem_start_page;
 
 /*
  * We special-case the C-O-W ZERO_PAGE, because it's such
  * a common occurrence (no need to read the page to know
  * that it's zero - better for the cache and memory subsystem).
  */
-static inline void copy_cow_page(unsigned long from, unsigned long to)
+static inline void copy_cow_page(struct page * from, struct page * to)
 {
        if (from == ZERO_PAGE(to)) {
-               clear_bigpage(to);
+               clear_highpage(to);
                return;
        }
-       copy_bigpage(to, from);
+       copy_highpage(to, from);
 }
 
 mem_map_t * mem_map = NULL;
@@ -89,7 +90,7 @@ static inline void free_one_pmd(pmd_t * dir)
        if (pmd_none(*dir))
                return;
        if (pmd_bad(*dir)) {
-               printk("free_one_pmd: bad directory entry %08lx\n", pmd_val(*dir));
+               pmd_ERROR(*dir);
                pmd_clear(dir);
                return;
        }
@@ -106,7 +107,7 @@ static inline void free_one_pgd(pgd_t * dir)
        if (pgd_none(*dir))
                return;
        if (pgd_bad(*dir)) {
-               printk("free_one_pgd: bad directory entry %08lx\n", pgd_val(*dir));
+               pgd_ERROR(*dir);
                pgd_clear(dir);
                return;
        }
@@ -179,11 +180,10 @@ int copy_page_range(struct mm_struct *dst, struct mm_struct *src,
                if (pgd_none(*src_pgd))
                        goto skip_copy_pmd_range;
                if (pgd_bad(*src_pgd)) {
-                       printk("copy_pmd_range: bad pgd (%08lx)\n", 
-                               pgd_val(*src_pgd));
+                       pgd_ERROR(*src_pgd);
                        pgd_clear(src_pgd);
 skip_copy_pmd_range:   address = (address + PGDIR_SIZE) & PGDIR_MASK;
-                       if (address >= end)
+                       if (!address || (address >= end))
                                goto out;
                        continue;
                }
@@ -203,7 +203,7 @@ skip_copy_pmd_range:        address = (address + PGDIR_SIZE) & PGDIR_MASK;
                        if (pmd_none(*src_pmd))
                                goto skip_copy_pte_range;
                        if (pmd_bad(*src_pmd)) {
-                               printk("copy_pte_range: bad pmd (%08lx)\n", pmd_val(*src_pmd));
+                               pmd_ERROR(*src_pmd);
                                pmd_clear(src_pmd);
 skip_copy_pte_range:           address = (address + PMD_SIZE) & PMD_MASK;
                                if (address >= end)
@@ -227,11 +227,11 @@ skip_copy_pte_range:              address = (address + PMD_SIZE) & PMD_MASK;
                                if (pte_none(pte))
                                        goto cont_copy_pte_range;
                                if (!pte_present(pte)) {
-                                       swap_duplicate(pte_val(pte));
+                                       swap_duplicate(pte);
                                        set_pte(dst_pte, pte);
                                        goto cont_copy_pte_range;
                                }
-                               page_nr = MAP_NR(pte_page(pte));
+                               page_nr = pte_pagenr(pte);
                                if (page_nr >= max_mapnr || 
                                    PageReserved(mem_map+page_nr)) {
                                        set_pte(dst_pte, pte);
@@ -272,17 +272,17 @@ nomem:
 static inline int free_pte(pte_t page)
 {
        if (pte_present(page)) {
-               unsigned long addr = pte_page(page);
-               if (MAP_NR(addr) >= max_mapnr || PageReserved(mem_map+MAP_NR(addr)))
+               unsigned long nr = pte_pagenr(page);
+               if (nr >= max_mapnr || PageReserved(mem_map+nr))
                        return 0;
                /* 
                 * free_page() used to be able to clear swap cache
                 * entries.  We may now have to do it manually.  
                 */
-               free_page_and_swap_cache(addr);
+               free_page_and_swap_cache(mem_map+nr);
                return 1;
        }
-       swap_free(pte_val(page));
+       swap_free(page);
        return 0;
 }
 
@@ -302,7 +302,7 @@ static inline int zap_pte_range(struct mm_struct *mm, pmd_t * pmd, unsigned long
        if (pmd_none(*pmd))
                return 0;
        if (pmd_bad(*pmd)) {
-               printk("zap_pte_range: bad pmd (%08lx)\n", pmd_val(*pmd));
+               pmd_ERROR(*pmd);
                pmd_clear(pmd);
                return 0;
        }
@@ -336,7 +336,7 @@ static inline int zap_pmd_range(struct mm_struct *mm, pgd_t * dir, unsigned long
        if (pgd_none(*dir))
                return 0;
        if (pgd_bad(*dir)) {
-               printk("zap_pmd_range: bad pgd (%08lx)\n", pgd_val(*dir));
+               pgd_ERROR(*dir);
                pgd_clear(dir);
                return 0;
        }
@@ -372,12 +372,14 @@ void zap_page_range(struct mm_struct *mm, unsigned long address, unsigned long s
         * even if kswapd happened to be looking at this
         * process we _want_ it to get stuck.
         */
+       if (address >= end)
+               BUG();
        spin_lock(&mm->page_table_lock);
-       while (address < end) {
+       do {
                freed += zap_pmd_range(mm, dir, address, end - address);
                address = (address + PGDIR_SIZE) & PGDIR_MASK;
                dir++;
-       }
+       } while (address && (address < end));
        spin_unlock(&mm->page_table_lock);
        /*
         * Update rss for the mm_struct (not necessarily current->mm)
@@ -393,7 +395,7 @@ void zap_page_range(struct mm_struct *mm, unsigned long address, unsigned long s
 /*
  * Do a quick page-table lookup for a single page. 
  */
-static unsigned long follow_page(unsigned long address) 
+static struct page * follow_page(unsigned long address) 
 {
        pgd_t *pgd;
        pmd_t *pmd;
@@ -402,31 +404,27 @@ static unsigned long follow_page(unsigned long address)
        pmd = pmd_offset(pgd, address);
        if (pmd) {
                pte_t * pte = pte_offset(pmd, address);
-               if (pte && pte_present(*pte)) {
+               if (pte && pte_present(*pte))
                        return pte_page(*pte);
-               }
        }
        
        printk(KERN_ERR "Missing page in follow_page\n");
-       return 0;
+       return NULL;
 }
 
 /* 
  * Given a physical address, is there a useful struct page pointing to it?
  */
 
-static struct page * get_page_map(unsigned long page)
+struct page * get_page_map(struct page *page)
 {
-       struct page *map;
-       
        if (MAP_NR(page) >= max_mapnr)
                return 0;
        if (page == ZERO_PAGE(page))
                return 0;
-       map = mem_map + MAP_NR(page);
-       if (PageReserved(map))
+       if (PageReserved(page))
                return 0;
-       return map;
+       return page;
 }
 
 /*
@@ -441,7 +439,6 @@ int map_user_kiobuf(int rw, struct kiobuf *iobuf, unsigned long va, size_t len)
        int                     err;
        struct mm_struct *      mm;
        struct vm_area_struct * vma = 0;
-       unsigned long           page;
        struct page *           map;
        int                     doublepage = 0;
        int                     repeat = 0;
@@ -482,13 +479,12 @@ int map_user_kiobuf(int rw, struct kiobuf *iobuf, unsigned long va, size_t len)
                if (handle_mm_fault(current, vma, ptr, (rw==READ)) <= 0) 
                        goto out_unlock;
                spin_lock(&mm->page_table_lock);
-               page = follow_page(ptr);
-               if (!page) {
+               map = follow_page(ptr);
+               if (!map) {
                        dprintk (KERN_ERR "Missing page in map_user_kiobuf\n");
-                       map = NULL;
                        goto retry;
                }
-               map = get_page_map(page);
+               map = get_page_map(map);
                if (map) {
                        if (TryLockPage(map)) {
                                goto retry;
@@ -496,8 +492,6 @@ int map_user_kiobuf(int rw, struct kiobuf *iobuf, unsigned long va, size_t len)
                        atomic_inc(&map->count);
                }
                spin_unlock(&mm->page_table_lock);
-               dprintk ("Installing page %p %p: %d\n", (void *)page, map, i);
-               iobuf->pagelist[i] = page;
                iobuf->maplist[i] = map;
                iobuf->nr_pages = ++i;
                
@@ -585,14 +579,13 @@ static inline void zeromap_pte_range(pte_t * pte, unsigned long address,
        if (end > PMD_SIZE)
                end = PMD_SIZE;
        do {
-               pte_t zero_pte = pte_wrprotect(mk_pte(ZERO_PAGE(address),
-                                              prot));
+               pte_t zero_pte = pte_wrprotect(mk_pte(ZERO_PAGE(address), prot));
                pte_t oldpage = *pte;
                set_pte(pte, zero_pte);
                forget_pte(oldpage);
                address += PAGE_SIZE;
                pte++;
-       } while (address < end);
+       } while (address && (address < end));
 }
 
 static inline int zeromap_pmd_range(pmd_t * pmd, unsigned long address,
@@ -611,7 +604,7 @@ static inline int zeromap_pmd_range(pmd_t * pmd, unsigned long address,
                zeromap_pte_range(pte, address, end - address, prot);
                address = (address + PMD_SIZE) & PMD_MASK;
                pmd++;
-       } while (address < end);
+       } while (address && (address < end));
        return 0;
 }
 
@@ -624,7 +617,9 @@ int zeromap_page_range(unsigned long address, unsigned long size, pgprot_t prot)
 
        dir = pgd_offset(current->mm, address);
        flush_cache_range(current->mm, beg, end);
-       while (address < end) {
+       if (address >= end)
+               BUG();
+       do {
                pmd_t *pmd = pmd_alloc(dir, address);
                error = -ENOMEM;
                if (!pmd)
@@ -634,7 +629,7 @@ int zeromap_page_range(unsigned long address, unsigned long size, pgprot_t prot)
                        break;
                address = (address + PGDIR_SIZE) & PGDIR_MASK;
                dir++;
-       }
+       } while (address && (address < end));
        flush_tlb_range(current->mm, beg, end);
        return error;
 }
@@ -665,7 +660,7 @@ static inline void remap_pte_range(pte_t * pte, unsigned long address, unsigned
                address += PAGE_SIZE;
                phys_addr += PAGE_SIZE;
                pte++;
-       } while (address < end);
+       } while (address && (address < end));
 }
 
 static inline int remap_pmd_range(pmd_t * pmd, unsigned long address, unsigned long size,
@@ -685,7 +680,7 @@ static inline int remap_pmd_range(pmd_t * pmd, unsigned long address, unsigned l
                remap_pte_range(pte, address, end - address, address + phys_addr, prot);
                address = (address + PMD_SIZE) & PMD_MASK;
                pmd++;
-       } while (address < end);
+       } while (address && (address < end));
        return 0;
 }
 
@@ -699,7 +694,9 @@ int remap_page_range(unsigned long from, unsigned long phys_addr, unsigned long
        phys_addr -= from;
        dir = pgd_offset(current->mm, from);
        flush_cache_range(current->mm, beg, end);
-       while (from < end) {
+       if (from >= end)
+               BUG();
+       do {
                pmd_t *pmd = pmd_alloc(dir, from);
                error = -ENOMEM;
                if (!pmd)
@@ -709,7 +706,7 @@ int remap_page_range(unsigned long from, unsigned long phys_addr, unsigned long
                        break;
                from = (from + PGDIR_SIZE) & PGDIR_MASK;
                dir++;
-       }
+       } while (from && (from < end));
        flush_tlb_range(current->mm, beg, end);
        return error;
 }
@@ -718,37 +715,35 @@ int remap_page_range(unsigned long from, unsigned long phys_addr, unsigned long
  * This routine is used to map in a page into an address space: needed by
  * execve() for the initial stack and environment pages.
  */
-unsigned long put_dirty_page(struct task_struct * tsk, unsigned long page, unsigned long address)
+struct page * put_dirty_page(struct task_struct * tsk, struct page *page,
+                                                unsigned long address)
 {
        pgd_t * pgd;
        pmd_t * pmd;
        pte_t * pte;
 
-       if (MAP_NR(page) >= max_mapnr)
-               printk("put_dirty_page: trying to put page %08lx at %08lx\n",page,address);
-       if (page_count(mem_map + MAP_NR(page)) != 1)
-               printk("mem_map disagrees with %08lx at %08lx\n",page,address);
-       pgd = pgd_offset(tsk->mm,address);
+       if (page_count(page) != 1)
+               printk("mem_map disagrees with %p at %08lx\n", page, address);
+       pgd = pgd_offset(tsk->mm, address);
        pmd = pmd_alloc(pgd, address);
        if (!pmd) {
-               free_page(page);
+               __free_page(page);
                oom(tsk);
                return 0;
        }
        pte = pte_alloc(pmd, address);
        if (!pte) {
-               free_page(page);
+               __free_page(page);
                oom(tsk);
                return 0;
        }
        if (!pte_none(*pte)) {
-               printk("put_dirty_page: pte %08lx already exists\n",
-                      pte_val(*pte));
-               free_page(page);
+               pte_ERROR(*pte);
+               __free_page(page);
                return 0;
        }
-       flush_page_to_ram(page);
-       set_pte(pte, pte_mkwrite(pte_mkdirty(mk_pte(page, PAGE_COPY))));
+       flush_page_to_ram(pte_page(page));
+       set_pte(pte, pte_mkwrite(page_pte_prot(page, PAGE_COPY)));
 /* no need for flush_tlb */
        return page;
 }
@@ -776,14 +771,14 @@ unsigned long put_dirty_page(struct task_struct * tsk, unsigned long page, unsig
 static int do_wp_page(struct task_struct * tsk, struct vm_area_struct * vma,
        unsigned long address, pte_t *page_table, pte_t pte)
 {
-       unsigned long old_page, new_page;
-       struct page * page;
+       unsigned long map_nr;
+       struct page *old_page, *new_page;
 
-       old_page = pte_page(pte);
-       if (MAP_NR(old_page) >= max_mapnr)
+       map_nr = pte_pagenr(pte);
+       if (map_nr >= max_mapnr)
                goto bad_wp_page;
        tsk->min_flt++;
-       page = mem_map + MAP_NR(old_page);
+       old_page = mem_map + map_nr;
        
        /*
         * We can avoid the copy if:
@@ -793,13 +788,13 @@ static int do_wp_page(struct task_struct * tsk, struct vm_area_struct * vma,
         *   in which case we can remove the page
         *   from the swap cache.
         */
-       switch (page_count(page)) {
+       switch (page_count(old_page)) {
        case 2:
-               if (!PageSwapCache(page))
+               if (!PageSwapCache(old_page))
                        break;
-               if (swap_count(page->offset) != 1)
+               if (swap_count(old_page) != 1)
                        break;
-               delete_from_swap_cache(page);
+               delete_from_swap_cache(old_page);
                /* FallThrough */
        case 1:
                flush_cache_page(vma, address);
@@ -813,7 +808,7 @@ static int do_wp_page(struct task_struct * tsk, struct vm_area_struct * vma,
         * Ok, we need to copy. Oh, well..
         */
        spin_unlock(&tsk->mm->page_table_lock);
-       new_page = __get_free_page(GFP_BIGUSER);
+       new_page = get_free_highpage(GFP_HIGHUSER);
        if (!new_page)
                return -1;
        spin_lock(&tsk->mm->page_table_lock);
@@ -822,9 +817,9 @@ static int do_wp_page(struct task_struct * tsk, struct vm_area_struct * vma,
         * Re-check the pte - we dropped the lock
         */
        if (pte_val(*page_table) == pte_val(pte)) {
-               if (PageReserved(page))
+               if (PageReserved(old_page))
                        ++vma->vm_mm->rss;
-               copy_cow_page(old_page,new_page);
+               copy_cow_page(old_page, new_page);
                flush_page_to_ram(new_page);
                flush_cache_page(vma, address);
                set_pte(page_table, pte_mkwrite(pte_mkdirty(mk_pte(new_page, vma->vm_page_prot))));
@@ -834,12 +829,12 @@ static int do_wp_page(struct task_struct * tsk, struct vm_area_struct * vma,
                new_page = old_page;
        }
        spin_unlock(&tsk->mm->page_table_lock);
-       free_page(new_page);
+       __free_page(new_page);
        return 1;
 
 bad_wp_page:
        spin_unlock(&tsk->mm->page_table_lock);
-       printk("do_wp_page: bogus page at address %08lx (%08lx)\n",address,old_page);
+       printk("do_wp_page: bogus page at address %08lx (nr %ld)\n",address,map_nr);
        return -1;
 }
 
@@ -848,6 +843,8 @@ bad_wp_page:
  */
 static void partial_clear(struct vm_area_struct *vma, unsigned long address)
 {
+       unsigned int offset;
+       struct page *page;
        pgd_t *page_dir;
        pmd_t *page_middle;
        pte_t *page_table, pte;
@@ -856,7 +853,7 @@ static void partial_clear(struct vm_area_struct *vma, unsigned long address)
        if (pgd_none(*page_dir))
                return;
        if (pgd_bad(*page_dir)) {
-               printk("bad page table directory entry %p:[%lx]\n", page_dir, pgd_val(*page_dir));
+               pgd_ERROR(*page_dir);
                pgd_clear(page_dir);
                return;
        }
@@ -864,7 +861,7 @@ static void partial_clear(struct vm_area_struct *vma, unsigned long address)
        if (pmd_none(*page_middle))
                return;
        if (pmd_bad(*page_middle)) {
-               printk("bad page table directory entry %p:[%lx]\n", page_dir, pgd_val(*page_dir));
+               pmd_ERROR(*page_middle);
                pmd_clear(page_middle);
                return;
        }
@@ -873,12 +870,11 @@ static void partial_clear(struct vm_area_struct *vma, unsigned long address)
        if (!pte_present(pte))
                return;
        flush_cache_page(vma, address);
-       address &= ~PAGE_MASK;
-       address += pte_page(pte);
-       if (MAP_NR(address) >= max_mapnr)
+       page = pte_page(pte);
+       if (page-mem_map >= max_mapnr)
                return;
-       memset((void *) address, 0, PAGE_SIZE - (address & ~PAGE_MASK));
-       flush_page_to_ram(pte_page(pte));
+       offset = address & ~PAGE_MASK;
+       memclear_highpage_flush(page, offset, PAGE_SIZE - offset);
 }
 
 /*
@@ -939,7 +935,7 @@ out_unlock:
  * because it doesn't cost us any seek time.  We also make sure to queue
  * the 'original' request together with the readahead ones...  
  */
-void swapin_readahead(unsigned long entry)
+void swapin_readahead(pte_t entry)
 {
        int i;
        struct page *new_page;
@@ -973,7 +969,7 @@ void swapin_readahead(unsigned long entry)
 
 static int do_swap_page(struct task_struct * tsk,
        struct vm_area_struct * vma, unsigned long address,
-       pte_t * page_table, unsigned long entry, int write_access)
+       pte_t * page_table, pte_t entry, int write_access)
 {
        struct page *page = lookup_swap_cache(entry);
        pte_t pte;
@@ -986,7 +982,7 @@ static int do_swap_page(struct task_struct * tsk,
                if (!page)
                        return -1;
 
-               flush_page_to_ram(page_address(page));
+               flush_page_to_ram(page);
        }
 
        vma->vm_mm->rss++;
@@ -995,13 +991,13 @@ static int do_swap_page(struct task_struct * tsk,
        swap_free(entry);
        unlock_kernel();
 
-       pte = mk_pte(page_address(page), vma->vm_page_prot);
+       pte = mk_pte(page, vma->vm_page_prot);
 
        set_bit(PG_swap_entry, &page->flags);
        if (write_access && !is_page_shared(page)) {
                delete_from_swap_cache(page);
-               page = replace_with_bigmem(page);
-               pte = mk_pte(page_address(page), vma->vm_page_prot);
+               page = replace_with_highmem(page);
+               pte = mk_pte(page, vma->vm_page_prot);
                pte = pte_mkwrite(pte_mkdirty(pte));
        }
        set_pte(page_table, pte);
@@ -1015,12 +1011,16 @@ static int do_swap_page(struct task_struct * tsk,
  */
 static int do_anonymous_page(struct task_struct * tsk, struct vm_area_struct * vma, pte_t *page_table, int write_access, unsigned long addr)
 {
+       int high = 0;
+       struct page *page = NULL;
        pte_t entry = pte_wrprotect(mk_pte(ZERO_PAGE(addr), vma->vm_page_prot));
        if (write_access) {
-               unsigned long page = __get_free_page(GFP_BIGUSER);
+               page = get_free_highpage(GFP_HIGHUSER);
                if (!page)
                        return -1;
-               clear_bigpage(page);
+               if (PageHighMem(page))
+                       high = 1;
+               clear_highpage(page);
                entry = pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot)));
                vma->vm_mm->rss++;
                tsk->min_flt++;
@@ -1047,7 +1047,7 @@ static int do_anonymous_page(struct task_struct * tsk, struct vm_area_struct * v
 static int do_no_page(struct task_struct * tsk, struct vm_area_struct * vma,
        unsigned long address, int write_access, pte_t *page_table)
 {
-       unsigned long page;
+       struct page * new_page;
        pte_t entry;
 
        if (!vma->vm_ops || !vma->vm_ops->nopage)
@@ -1058,12 +1058,11 @@ static int do_no_page(struct task_struct * tsk, struct vm_area_struct * vma,
         * to copy, not share the page even if sharing is possible.  It's
         * essentially an early COW detection.
         */
-       page = vma->vm_ops->nopage(vma, address & PAGE_MASK, (vma->vm_flags & VM_SHARED)?0:write_access);
-       if (!page)
+       new_page = vma->vm_ops->nopage(vma, address & PAGE_MASK, (vma->vm_flags & VM_SHARED)?0:write_access);
+       if (!new_page)
                return 0;       /* SIGBUS - but we _really_ should know whether it is OOM or SIGBUS */
-       if (page == -1)
+       if (new_page == (struct page *)-1)
                return -1;      /* OOM */
-
        ++tsk->maj_flt;
        ++vma->vm_mm->rss;
        /*
@@ -1076,11 +1075,11 @@ static int do_no_page(struct task_struct * tsk, struct vm_area_struct * vma,
         * so we can make it writable and dirty to avoid having to
         * handle that later.
         */
-       flush_page_to_ram(page);
-       entry = mk_pte(page, vma->vm_page_prot);
+       flush_page_to_ram(new_page);
+       entry = mk_pte(new_page, vma->vm_page_prot);
        if (write_access) {
                entry = pte_mkwrite(pte_mkdirty(entry));
-       } else if (page_count(mem_map+MAP_NR(page)) > 1 &&
+       } else if (page_count(new_page) > 1 &&
                   !(vma->vm_flags & VM_SHARED))
                entry = pte_wrprotect(entry);
        set_pte(page_table, entry);
@@ -1117,7 +1116,7 @@ static inline int handle_pte_fault(struct task_struct *tsk,
        if (!pte_present(entry)) {
                if (pte_none(entry))
                        return do_no_page(tsk, vma, address, write_access, pte);
-               return do_swap_page(tsk, vma, address, pte, pte_val(entry), write_access);
+               return do_swap_page(tsk, vma, address, pte, entry, write_access);
        }
 
        /*
@@ -1148,17 +1147,19 @@ static inline int handle_pte_fault(struct task_struct *tsk,
 int handle_mm_fault(struct task_struct *tsk, struct vm_area_struct * vma,
        unsigned long address, int write_access)
 {
+       int ret = -1;
        pgd_t *pgd;
        pmd_t *pmd;
 
        pgd = pgd_offset(vma->vm_mm, address);
        pmd = pmd_alloc(pgd, address);
+       
        if (pmd) {
                pte_t * pte = pte_alloc(pmd, address);
                if (pte)
-                       return handle_pte_fault(tsk, vma, address, write_access, pte);
+                       ret = handle_pte_fault(tsk, vma, address, write_access, pte);
        }
-       return -1;
+       return ret;
 }
 
 /*
@@ -1172,10 +1173,12 @@ int make_pages_present(unsigned long addr, unsigned long end)
 
        vma = find_vma(tsk->mm, addr);
        write = (vma->vm_flags & VM_WRITE) != 0;
-       while (addr < end) {
+       if (addr >= end)
+               BUG();
+       do {
                if (handle_mm_fault(tsk, vma, addr, write) < 0)
                        return -1;
                addr += PAGE_SIZE;
-       }
+       } while (addr < end);
        return 0;
 }
index 14413b3082e03c542edd2cf2f817c18a4e29fe4d..a42e9a4cc7ae142379efd57f4f8d869f4eb44dd2 100644 (file)
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -62,7 +62,7 @@ int vm_enough_memory(long pages)
        if (sysctl_overcommit_memory)
            return 1;
 
-       free = atomic_read(&buffermem) >> PAGE_SHIFT;
+       free = atomic_read(&buffermem_pages);
        free += atomic_read(&page_cache_size);
        free += nr_free_pages;
        free += nr_swap_pages;
index de2fd6917fa1ee436092aab9e6e9fc533d965712..56454fc070991bd2783dd3984f1303a83e2f5089 100644 (file)
@@ -20,7 +20,7 @@ static inline void change_pte_range(pmd_t * pmd, unsigned long address,
        if (pmd_none(*pmd))
                return;
        if (pmd_bad(*pmd)) {
-               printk("change_pte_range: bad pmd (%08lx)\n", pmd_val(*pmd));
+               pmd_ERROR(*pmd);
                pmd_clear(pmd);
                return;
        }
@@ -35,7 +35,7 @@ static inline void change_pte_range(pmd_t * pmd, unsigned long address,
                        set_pte(pte, pte_modify(entry, newprot));
                address += PAGE_SIZE;
                pte++;
-       } while (address < end);
+       } while (address && (address < end));
 }
 
 static inline void change_pmd_range(pgd_t * pgd, unsigned long address,
@@ -47,7 +47,7 @@ static inline void change_pmd_range(pgd_t * pgd, unsigned long address,
        if (pgd_none(*pgd))
                return;
        if (pgd_bad(*pgd)) {
-               printk("change_pmd_range: bad pgd (%08lx)\n", pgd_val(*pgd));
+               pgd_ERROR(*pgd);
                pgd_clear(pgd);
                return;
        }
@@ -60,7 +60,7 @@ static inline void change_pmd_range(pgd_t * pgd, unsigned long address,
                change_pte_range(pmd, address, end - address, newprot);
                address = (address + PMD_SIZE) & PMD_MASK;
                pmd++;
-       } while (address < end);
+       } while (address && (address < end));
 }
 
 static void change_protection(unsigned long start, unsigned long end, pgprot_t newprot)
@@ -70,11 +70,13 @@ static void change_protection(unsigned long start, unsigned long end, pgprot_t n
 
        dir = pgd_offset(current->mm, start);
        flush_cache_range(current->mm, beg, end);
-       while (start < end) {
+       if (start >= end)
+               BUG();
+       do {
                change_pmd_range(dir, start, end - start, newprot);
                start = (start + PGDIR_SIZE) & PGDIR_MASK;
                dir++;
-       }
+       } while (start && (start < end));
        flush_tlb_range(current->mm, beg, end);
        return;
 }
index 101e513108d9c0600c9cb5f899174ab7e7e44043..b73996dc2eb3239e57f32bb7a50331c07c978944 100644 (file)
@@ -25,7 +25,7 @@ static inline pte_t *get_one_pte(struct mm_struct *mm, unsigned long addr)
        if (pgd_none(*pgd))
                goto end;
        if (pgd_bad(*pgd)) {
-               printk("move_one_page: bad source pgd (%08lx)\n", pgd_val(*pgd));
+               pgd_ERROR(*pgd);
                pgd_clear(pgd);
                goto end;
        }
@@ -34,7 +34,7 @@ static inline pte_t *get_one_pte(struct mm_struct *mm, unsigned long addr)
        if (pmd_none(*pmd))
                goto end;
        if (pmd_bad(*pmd)) {
-               printk("move_one_page: bad source pmd (%08lx)\n", pmd_val(*pmd));
+               pmd_ERROR(*pmd);
                pmd_clear(pmd);
                goto end;
        }
index b62783c723323564de553cdcbb8a8c00a278e689..772a30057abae33220c9d8d011c88731634beac4 100644 (file)
@@ -14,7 +14,8 @@
 #include <linux/interrupt.h>
 #include <linux/init.h>
 #include <linux/pagemap.h>
-#include <linux/bigmem.h> /* export bigmem vars */
+#include <linux/highmem.h>
+#include <linux/bootmem.h>
 
 #include <asm/dma.h>
 #include <asm/uaccess.h> /* for copy_to/from_user */
@@ -40,46 +41,18 @@ LIST_HEAD(lru_cache);
 #define NR_MEM_LISTS 10
 #endif
 
-/* The start of this MUST match the start of "struct page" */
 struct free_area_struct {
-       struct page *next;
-       struct page *prev;
+       struct list_head free_list;
        unsigned int * map;
 };
 
-#define memory_head(x) ((struct page *)(x))
-
-#ifdef CONFIG_BIGMEM
-#define BIGMEM_LISTS_OFFSET    NR_MEM_LISTS
+#ifdef CONFIG_HIGHMEM
+#define HIGHMEM_LISTS_OFFSET   NR_MEM_LISTS
 static struct free_area_struct free_area[NR_MEM_LISTS*2];
 #else
 static struct free_area_struct free_area[NR_MEM_LISTS];
 #endif
 
-static inline void init_mem_queue(struct free_area_struct * head)
-{
-       head->next = memory_head(head);
-       head->prev = memory_head(head);
-}
-
-static inline void add_mem_queue(struct free_area_struct * head, struct page * entry)
-{
-       struct page * next = head->next;
-
-       entry->prev = memory_head(head);
-       entry->next = next;
-       next->prev = entry;
-       head->next = entry;
-}
-
-static inline void remove_mem_queue(struct page * entry)
-{
-       struct page * next = entry->next;
-       struct page * prev = entry->prev;
-       next->prev = prev;
-       prev->next = next;
-}
-
 /*
  * Free_page() adds the page to the free lists. This is optimized for
  * fast normal cases (no error jumps taken normally).
@@ -99,41 +72,67 @@ static inline void remove_mem_queue(struct page * entry)
  */
 spinlock_t page_alloc_lock = SPIN_LOCK_UNLOCKED;
 
+#define memlist_init(x) INIT_LIST_HEAD(x)
+#define memlist_add_head list_add
+#define memlist_add_tail list_add_tail
+#define memlist_del list_del
+#define memlist_entry list_entry
+#define memlist_next(x) ((x)->next)
+#define memlist_prev(x) ((x)->prev)
+
 static inline void free_pages_ok(unsigned long map_nr, unsigned long order)
 {
        struct free_area_struct *area = free_area + order;
        unsigned long index = map_nr >> (1 + order);
        unsigned long mask = (~0UL) << order;
        unsigned long flags;
+       struct page *page, *buddy;
 
        spin_lock_irqsave(&page_alloc_lock, flags);
 
 #define list(x) (mem_map+(x))
 
-#ifdef CONFIG_BIGMEM
-       if (map_nr >= bigmem_mapnr) {
-               area += BIGMEM_LISTS_OFFSET;
-               nr_free_bigpages -= mask;
+#ifdef CONFIG_HIGHMEM
+       if (map_nr >= highmem_mapnr) {
+               area += HIGHMEM_LISTS_OFFSET;
+               nr_free_highpages -= mask;
        }
 #endif
        map_nr &= mask;
        nr_free_pages -= mask;
+
        while (mask + (1 << (NR_MEM_LISTS-1))) {
                if (!test_and_change_bit(index, area->map))
+                       /*
+                        * the buddy page is still allocated.
+                        */
                        break;
-               remove_mem_queue(list(map_nr ^ -mask));
+               /*
+                * Move the buddy up one level.
+                */
+               buddy = list(map_nr ^ -mask);
+               page = list(map_nr);
+
+               memlist_del(&buddy->list);
                mask <<= 1;
                area++;
                index >>= 1;
                map_nr &= mask;
        }
-       add_mem_queue(area, list(map_nr));
-
+       memlist_add_head(&(list(map_nr))->list, &area->free_list);
 #undef list
 
        spin_unlock_irqrestore(&page_alloc_lock, flags);
 }
 
+/*
+ * Some ugly macros to speed up __get_free_pages()..
+ */
+#define MARK_USED(index, order, area) \
+       change_bit((index) >> (1+(order)), (area)->map)
+#define CAN_DMA(x) (PageDMA(x))
+#define ADDRESS(x) (PAGE_OFFSET + ((x) << PAGE_SHIFT))
+
 int __free_page(struct page *page)
 {
        if (!PageReserved(page) && put_page_testzero(page)) {
@@ -142,7 +141,7 @@ int __free_page(struct page *page)
                if (PageLocked(page))
                        PAGE_BUG(page);
 
-               free_pages_ok(page - mem_map, 0);
+               free_pages_ok(page-mem_map, 0);
                return 1;
        }
        return 0;
@@ -166,148 +165,146 @@ int free_pages(unsigned long addr, unsigned long order)
        return 0;
 }
 
-/*
- * Some ugly macros to speed up __get_free_pages()..
- */
-#define MARK_USED(index, order, area) \
-       change_bit((index) >> (1+(order)), (area)->map)
-#define CAN_DMA(x) (PageDMA(x))
-#define ADDRESS(x) (PAGE_OFFSET + ((x) << PAGE_SHIFT))
+static inline unsigned long EXPAND (struct page *map, unsigned long index,
+                int low, int high, struct free_area_struct * area)
+{
+       unsigned long size = 1 << high;
+
+       while (high > low) {
+               area--;
+               high--;
+               size >>= 1;
+               memlist_add_head(&(map)->list, &(area)->free_list);
+               MARK_USED(index, high, area);
+               index += size;
+               map += size;
+       }
+       set_page_count(map, 1);
+       return index;
+}
+
+static inline struct page * rmqueue (int order, int gfp_mask, int offset)
+{
+       struct free_area_struct * area = free_area+order+offset;
+       unsigned long curr_order = order, map_nr;
+       struct page *page;
+       struct list_head *head, *curr;
+
+       do {
+               head = &area->free_list;
+               curr = memlist_next(head);
+
+               while (curr != head) {
+                       page = memlist_entry(curr, struct page, list);
+                       if (!(gfp_mask & __GFP_DMA) || CAN_DMA(page)) {
+                               memlist_del(curr);
+                               map_nr = page - mem_map;        
+                               MARK_USED(map_nr, curr_order, area);
+                               nr_free_pages -= 1 << order;
+                               map_nr = EXPAND(page, map_nr, order, curr_order, area);
+                               page = mem_map + map_nr;
+                               return page;    
+                       }
+                       curr = memlist_next(curr);
+               }
+               curr_order++;
+               area++;
+       } while (curr_order < NR_MEM_LISTS);
 
-#ifdef CONFIG_BIGMEM
-#define RMQUEUEBIG(order, gfp_mask) \
-if (gfp_mask & __GFP_BIGMEM) { \
-       struct free_area_struct * area = free_area+order+BIGMEM_LISTS_OFFSET; \
-       unsigned long new_order = order; \
-       do { struct page *prev = memory_head(area), *ret = prev->next; \
-               if (memory_head(area) != ret) { \
-                       unsigned long map_nr; \
-                       (prev->next = ret->next)->prev = prev; \
-                       map_nr = ret - mem_map; \
-                       MARK_USED(map_nr, new_order, area); \
-                       nr_free_pages -= 1 << order; \
-                       nr_free_bigpages -= 1 << order; \
-                       EXPAND(ret, map_nr, order, new_order, area); \
-                       spin_unlock_irqrestore(&page_alloc_lock, flags); \
-                       return ADDRESS(map_nr); \
-               } \
-               new_order++; area++; \
-       } while (new_order < NR_MEM_LISTS); \
+       return NULL;
 }
+
+static inline int balance_lowmemory (int gfp_mask)
+{
+       int freed;
+       static int low_on_memory = 0;
+
+#ifndef CONFIG_HIGHMEM
+       if (nr_free_pages > freepages.min) {
+               if (!low_on_memory)
+                       return 1;
+               if (nr_free_pages >= freepages.high) {
+                       low_on_memory = 0;
+                       return 1;
+               }
+       }
+
+       low_on_memory = 1;
+#else
+       static int low_on_highmemory = 0;
+
+       if (gfp_mask & __GFP_HIGHMEM)
+       {
+               if (nr_free_pages > freepages.min) {
+                       if (!low_on_highmemory) {
+                               return 1;
+                       }
+                       if (nr_free_pages >= freepages.high) {
+                               low_on_highmemory = 0;
+                               return 1;
+                       }
+               }
+               low_on_highmemory = 1;
+       } else {
+               if (nr_free_pages+nr_free_highpages > freepages.min) {
+                       if (!low_on_memory) {
+                               return 1;
+                       }
+                       if (nr_free_pages+nr_free_highpages >= freepages.high) {
+                               low_on_memory = 0;
+                               return 1;
+                       }
+               }
+               low_on_memory = 1;
+       }
 #endif
+       current->flags |= PF_MEMALLOC;
+       freed = try_to_free_pages(gfp_mask);
+       current->flags &= ~PF_MEMALLOC;
 
-#define RMQUEUE(order, gfp_mask) \
-do { struct free_area_struct * area = free_area+order; \
-     unsigned long new_order = order; \
-       do { struct page *prev = memory_head(area), *ret = prev->next; \
-               while (memory_head(area) != ret) { \
-                       if (!(gfp_mask & __GFP_DMA) || CAN_DMA(ret)) { \
-                               unsigned long map_nr; \
-                               (prev->next = ret->next)->prev = prev; \
-                               map_nr = ret - mem_map; \
-                               MARK_USED(map_nr, new_order, area); \
-                               nr_free_pages -= 1 << order; \
-                               EXPAND(ret, map_nr, order, new_order, area); \
-                               spin_unlock_irqrestore(&page_alloc_lock,flags);\
-                               return ADDRESS(map_nr); \
-                       } \
-                       prev = ret; \
-                       ret = ret->next; \
-               } \
-               new_order++; area++; \
-       } while (new_order < NR_MEM_LISTS); \
-} while (0)
-
-#define EXPAND(map,index,low,high,area) \
-do { unsigned long size = 1 << high; \
-       while (high > low) { \
-               area--; high--; size >>= 1; \
-               add_mem_queue(area, map); \
-               MARK_USED(index, high, area); \
-               index += size; \
-               map += size; \
-       } \
-       set_page_count(map, 1); \
-} while (0)
+       if (!freed && !(gfp_mask & (__GFP_MED | __GFP_HIGH)))
+               return 0;
+       return 1;
+}
 
-unsigned long __get_free_pages(int gfp_mask, unsigned long order)
+struct page * __get_pages(int gfp_mask, unsigned long order)
 {
        unsigned long flags;
+       struct page *page;
 
        if (order >= NR_MEM_LISTS)
                goto nopage;
 
-#ifdef ATOMIC_MEMORY_DEBUGGING
-       if ((gfp_mask & __GFP_WAIT) && in_interrupt()) {
-               static int count = 0;
-               if (++count < 5) {
-                       printk("gfp called nonatomically from interrupt %p\n",
-                               __builtin_return_address(0));
-               }
-               goto nopage;
-       }
-#endif
+       /*
+        * If anyone calls gfp from interrupts nonatomically then it
+        * will sooner or later tripped up by a schedule().
+        */
 
        /*
         * If this is a recursive call, we'd better
         * do our best to just allocate things without
         * further thought.
         */
-       if (!(current->flags & PF_MEMALLOC)) {
-               int freed;
-               static int low_on_memory = 0;
+       if (!(current->flags & PF_MEMALLOC))
+               goto lowmemory;
 
-#ifndef CONFIG_BIGMEM
-               if (nr_free_pages > freepages.min) {
-                       if (!low_on_memory)
-                               goto ok_to_allocate;
-                       if (nr_free_pages >= freepages.high) {
-                               low_on_memory = 0;
-                               goto ok_to_allocate;
-                       }
-               }
+ok_to_allocate:
+       spin_lock_irqsave(&page_alloc_lock, flags);
 
-               low_on_memory = 1;
-#else
-               static int low_on_bigmemory = 0;
-
-               if (gfp_mask & __GFP_BIGMEM)
-               {
-                       if (nr_free_pages > freepages.min) {
-                               if (!low_on_bigmemory)
-                                       goto ok_to_allocate;
-                               if (nr_free_pages >= freepages.high) {
-                                       low_on_bigmemory = 0;
-                                       goto ok_to_allocate;
-                               }
-                       }
-                       low_on_bigmemory = 1;
-               } else {
-                       if (nr_free_pages-nr_free_bigpages > freepages.min) {
-                               if (!low_on_memory)
-                                       goto ok_to_allocate;
-                               if (nr_free_pages-nr_free_bigpages >= freepages.high) {
-                                       low_on_memory = 0;
-                                       goto ok_to_allocate;
-                               }
-                       }
-                       low_on_memory = 1;
+#ifdef CONFIG_HIGHMEM
+       if (gfp_mask & __GFP_HIGHMEM) {
+               page = rmqueue(order, gfp_mask, HIGHMEM_LISTS_OFFSET);
+               if (page) {
+                       nr_free_highpages -= 1 << order;
+                       spin_unlock_irqrestore(&page_alloc_lock, flags);
+                       goto ret;
                }
-#endif
-               current->flags |= PF_MEMALLOC;
-               freed = try_to_free_pages(gfp_mask);
-               current->flags &= ~PF_MEMALLOC;
-
-               if (!freed && !(gfp_mask & (__GFP_MED | __GFP_HIGH)))
-                       goto nopage;
        }
-ok_to_allocate:
-       spin_lock_irqsave(&page_alloc_lock, flags);
-#ifdef CONFIG_BIGMEM
-       RMQUEUEBIG(order, gfp_mask);
 #endif
-       RMQUEUE(order, gfp_mask);
+       page = rmqueue(order, gfp_mask, 0);
        spin_unlock_irqrestore(&page_alloc_lock, flags);
+       if (page)
+               goto ret;
 
        /*
         * If we can schedule, do so, and make sure to yield.
@@ -320,7 +317,28 @@ ok_to_allocate:
        }
 
 nopage:
-       return 0;
+       return NULL;
+
+lowmemory:
+       if (balance_lowmemory(gfp_mask))
+               goto ok_to_allocate;
+       goto nopage;
+ret:
+       return page;
+}
+
+unsigned long __get_free_pages(int gfp_mask, unsigned long order)
+{
+       struct page *page;
+       page = __get_pages(gfp_mask, order);
+       if (!page)
+               return 0;
+       return page_address(page);
+}
+
+struct page * get_free_highpage(int gfp_mask)
+{
+       return __get_pages(gfp_mask, 0);
 }
 
 /*
@@ -333,33 +351,37 @@ void show_free_areas(void)
        unsigned long order, flags;
        unsigned long total = 0;
 
-       printk("Free pages:      %6dkB (%6dkB BigMem)\n ( ",
+       printk("Free pages:      %6dkB (%6ldkB HighMem)\n ( ",
                nr_free_pages<<(PAGE_SHIFT-10),
-               nr_free_bigpages<<(PAGE_SHIFT-10));
+               nr_free_highpages<<(PAGE_SHIFT-10));
        printk("Free: %d, lru_cache: %d (%d %d %d)\n",
                nr_free_pages,
                nr_lru_pages,
                freepages.min,
                freepages.low,
                freepages.high);
+
        spin_lock_irqsave(&page_alloc_lock, flags);
-       for (order=0 ; order < NR_MEM_LISTS; order++) {
-               struct page * tmp;
+       for (order = 0; order < NR_MEM_LISTS; order++) {
                unsigned long nr = 0;
-               for (tmp = free_area[order].next ; tmp != memory_head(free_area+order) ; tmp = tmp->next) {
-                       nr ++;
-               }
-#ifdef CONFIG_BIGMEM
-               for (tmp = free_area[BIGMEM_LISTS_OFFSET+order].next;
-                    tmp != memory_head(free_area+BIGMEM_LISTS_OFFSET+order);
-                    tmp = tmp->next) {
-                       nr ++;
+               struct list_head *head, *curr;
+               struct page *page;
+
+               head = &free_area[order].free_list;
+               for (curr = memlist_next(head); curr != head; curr = memlist_next(curr)) {
+                       page = memlist_entry(curr, struct page, list);
+                       nr++;
                }
+#ifdef CONFIG_HIGHMEM
+               head = &free_area[order+HIGHMEM_LISTS_OFFSET].free_list;
+               for (curr = memlist_next(head); curr != head; curr = memlist_next(curr))
+                       nr++;
 #endif
                total += nr * ((PAGE_SIZE>>10) << order);
                printk("%lu*%lukB ", nr, (unsigned long)((PAGE_SIZE>>10) << order));
        }
        spin_unlock_irqrestore(&page_alloc_lock, flags);
+
        printk("= %lukB)\n", total);
 #ifdef SWAP_CACHE_INFO
        show_swap_cache_info();
@@ -374,11 +396,13 @@ void show_free_areas(void)
  *   - mark all memory queues empty
  *   - clear the memory bitmaps
  */
-unsigned long __init free_area_init(unsigned long start_mem, unsigned long end_mem)
+volatile int data;
+void __init free_area_init(unsigned long end_mem_pages)
 {
        mem_map_t * p;
-       unsigned long mask = PAGE_MASK;
+       unsigned long mask = -1;
        unsigned long i;
+       unsigned long map_size;
 
        /*
         * Select nr of pages we try to keep free for important stuff
@@ -387,7 +411,7 @@ unsigned long __init free_area_init(unsigned long start_mem, unsigned long end_m
         * This is fairly arbitrary, but based on some behaviour
         * analysis.
         */
-       i = (end_mem - PAGE_OFFSET) >> (PAGE_SHIFT+7);
+       i = end_mem_pages >> 7;
        if (i < 10)
                i = 10;
        if (i > 256)
@@ -395,36 +419,48 @@ unsigned long __init free_area_init(unsigned long start_mem, unsigned long end_m
        freepages.min = i;
        freepages.low = i * 2;
        freepages.high = i * 3;
-       mem_map = (mem_map_t *) LONG_ALIGN(start_mem);
-       p = mem_map + MAP_NR(end_mem);
-       start_mem = LONG_ALIGN((unsigned long) p);
-       memset(mem_map, 0, start_mem - (unsigned long) mem_map);
-       do {
-               --p;
+
+       /*
+        * Most architectures just pick 'start_mem'. Some architectures
+        * (with lots of mem and discontinous memory maps) have to search
+        * for a good area.
+        */
+       map_size = end_mem_pages*sizeof(struct page);
+       mem_map = (struct page *) alloc_bootmem(map_size);
+       memset(mem_map, 0, map_size);
+
+       /*
+        * Initially all pages are reserved - free ones are freed
+        * up by free_all_bootmem() once the early boot process is
+        * done.
+        */
+       for (p = mem_map; p < mem_map + end_mem_pages; p++) {
                set_page_count(p, 0);
-               p->flags = (1 << PG_DMA) | (1 << PG_reserved);
+               p->flags = (1 << PG_DMA);
+               SetPageReserved(p);
                init_waitqueue_head(&p->wait);
-       } while (p > mem_map);
-
+               memlist_init(&p->list);
+       }
+       
        for (i = 0 ; i < NR_MEM_LISTS ; i++) {
                unsigned long bitmap_size;
-               init_mem_queue(free_area+i);
-#ifdef CONFIG_BIGMEM
-               init_mem_queue(free_area+BIGMEM_LISTS_OFFSET+i);
+               unsigned int * map;
+               memlist_init(&(free_area+i)->free_list);
+#ifdef CONFIG_HIGHMEM
+               memlist_init(&(free_area+HIGHMEM_LISTS_OFFSET+i)->free_list);
 #endif
                mask += mask;
-               end_mem = (end_mem + ~mask) & mask;
-               bitmap_size = (end_mem - PAGE_OFFSET) >> (PAGE_SHIFT + i);
+               end_mem_pages = (end_mem_pages + ~mask) & mask;
+               bitmap_size = end_mem_pages >> i;
                bitmap_size = (bitmap_size + 7) >> 3;
                bitmap_size = LONG_ALIGN(bitmap_size);
-               free_area[i].map = (unsigned int *) start_mem;
-               memset((void *) start_mem, 0, bitmap_size);
-               start_mem += bitmap_size;
-#ifdef CONFIG_BIGMEM
-               free_area[BIGMEM_LISTS_OFFSET+i].map = (unsigned int *) start_mem;
-               memset((void *) start_mem, 0, bitmap_size);
-               start_mem += bitmap_size;
+               map = (unsigned int *) alloc_bootmem(bitmap_size);
+               free_area[i].map = map;
+               memset((void *) map, 0, bitmap_size);
+#ifdef CONFIG_HIGHMEM
+               map = (unsigned int *) alloc_bootmem(bitmap_size);
+               free_area[HIGHMEM_LISTS_OFFSET+i].map = map;
+               memset((void *) map, 0, bitmap_size);
 #endif
        }
-       return start_mem;
 }
index 97516e77dcfae918c9f79dd6f86fe56de2a4b034..3ce1a186c4e1edeac68f634733042b8752bc4899 100644 (file)
@@ -33,7 +33,7 @@
  * that shared pages stay shared while being swapped.
  */
 
-static int rw_swap_page_base(int rw, unsigned long entry, struct page *page, int wait)
+static int rw_swap_page_base(int rw, pte_t entry, struct page *page, int wait)
 {
        unsigned long type, offset;
        struct swap_info_struct * p;
@@ -42,13 +42,6 @@ static int rw_swap_page_base(int rw, unsigned long entry, struct page *page, int
        kdev_t dev = 0;
        int block_size;
 
-#ifdef DEBUG_SWAP
-       printk ("DebugVM: %s_swap_page entry %08lx, page %p (count %d), %s\n",
-               (rw == READ) ? "read" : "write", 
-               entry, (char *) page_address(page), page_count(page),
-               wait ? "wait" : "nowait");
-#endif
-
        type = SWP_TYPE(entry);
        if (type >= nr_swapfiles) {
                printk("Internal error: bad swap-device\n");
@@ -66,9 +59,7 @@ static int rw_swap_page_base(int rw, unsigned long entry, struct page *page, int
                return 0;
        }
        if (p->swap_map && !p->swap_map[offset]) {
-               printk(KERN_ERR "rw_swap_page: "
-                       "Trying to %s unallocated swap (%08lx)\n", 
-                       (rw == READ) ? "read" : "write", entry);
+               pte_ERROR(entry);
                return 0;
        }
        if (!(p->flags & SWP_USED)) {
@@ -127,12 +118,6 @@ static int rw_swap_page_base(int rw, unsigned long entry, struct page *page, int
        if (page_count(page) == 0)
                printk(KERN_ERR "rw_swap_page: page unused while waiting!\n");
 
-#ifdef DEBUG_SWAP
-       printk ("DebugVM: %s_swap_page finished on page %p (count %d)\n",
-               (rw == READ) ? "read" : "write", 
-               (char *) page_address(page), 
-               page_count(page));
-#endif
        return 1;
 }
 
@@ -145,7 +130,7 @@ static int rw_swap_page_base(int rw, unsigned long entry, struct page *page, int
  */
 void rw_swap_page(int rw, struct page *page, int wait)
 {
-       unsigned long entry = page->offset;
+       pte_t entry = get_pagecache_pte(page);
 
        if (!PageLocked(page))
                PAGE_BUG(page);
@@ -162,7 +147,7 @@ void rw_swap_page(int rw, struct page *page, int wait)
  * Therefore we can't use it.  Later when we can remove the need for the
  * lock map and we can reduce the number of functions exported.
  */
-void rw_swap_page_nolock(int rw, unsigned long entry, char *buf, int wait)
+void rw_swap_page_nolock(int rw, pte_t entry, char *buf, int wait)
 {
        struct page *page = mem_map + MAP_NR(buf);
        
index 0776b3ad02688490f789e81ee2bb57fb748e4c7f..23f88618f089053257567a1d3dfe79f538c4fed2 100644 (file)
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -317,10 +317,10 @@ static int slab_break_gfp_order = SLAB_BREAK_GFP_ORDER_LO;
  * slab an obj belongs to.  With kmalloc(), and kfree(), these are used
  * to find the cache which an obj belongs to.
  */
-#define        SLAB_SET_PAGE_CACHE(pg, x)      ((pg)->next = (struct page *)(x))
-#define        SLAB_GET_PAGE_CACHE(pg)         ((kmem_cache_t *)(pg)->next)
-#define        SLAB_SET_PAGE_SLAB(pg, x)       ((pg)->prev = (struct page *)(x))
-#define        SLAB_GET_PAGE_SLAB(pg)          ((kmem_slab_t *)(pg)->prev)
+#define        SLAB_SET_PAGE_CACHE(pg,x)  ((pg)->list.next = (struct list_head *)(x))
+#define        SLAB_GET_PAGE_CACHE(pg)    ((kmem_cache_t *)(pg)->list.next)
+#define        SLAB_SET_PAGE_SLAB(pg,x)   ((pg)->list.prev = (struct list_head *)(x))
+#define        SLAB_GET_PAGE_SLAB(pg)     ((kmem_slab_t *)(pg)->list.prev)
 
 /* Size description struct for general caches. */
 typedef struct cache_sizes {
@@ -402,7 +402,7 @@ static kmem_cache_t *cache_slabp = NULL;
 static unsigned long bufctl_limit = 0;
 
 /* Initialisation - setup the `cache' cache. */
-long __init kmem_cache_init(long start, long end)
+void __init kmem_cache_init(void)
 {
        size_t size, i;
 
@@ -450,7 +450,6 @@ long __init kmem_cache_init(long start, long end)
         */
        if (num_physpages > (32 << 20) >> PAGE_SHIFT)
                slab_break_gfp_order = SLAB_BREAK_GFP_ORDER_HI;
-       return start;
 }
 
 /* Initialisation - setup remaining internal and general caches.
index 3b3a65a715edbcbaa914a5e402a45f5c7cf4b405..0a78127f2c9817da3340dde99402704d644b1a38 100644 (file)
@@ -67,25 +67,16 @@ void show_swap_cache_info(void)
 }
 #endif
 
-void add_to_swap_cache(struct page *page, unsigned long entry)
+void add_to_swap_cache(struct page *page, pte_t entry)
 {
 #ifdef SWAP_CACHE_INFO
        swap_cache_add_total++;
 #endif
-#ifdef DEBUG_SWAP
-       printk("DebugVM: add_to_swap_cache(%08lx count %d, entry %08lx)\n",
-                  page_address(page), page_count(page), entry);
-#endif
-       if (PageTestandSetSwapCache(page)) {
-               printk(KERN_ERR "swap_cache: replacing non-empty entry %08lx "
-                          "on page %08lx\n",
-                          page->offset, page_address(page));
-       }
-       if (page->inode) {
-               printk(KERN_ERR "swap_cache: replacing page-cached entry "
-                          "on page %08lx\n", page_address(page));
-       }
-       add_to_page_cache(page, &swapper_inode, entry);
+       if (PageTestandSetSwapCache(page))
+               BUG();
+       if (page->inode)
+               BUG();
+       add_to_page_cache(page, &swapper_inode, pte_val(entry));
 }
 
 /*
@@ -94,13 +85,13 @@ void add_to_swap_cache(struct page *page, unsigned long entry)
  * Note: if swap_map[] reaches SWAP_MAP_MAX the entries are treated as
  * "permanent", but will be reclaimed by the next swapoff.
  */
-int swap_duplicate(unsigned long entry)
+int swap_duplicate(pte_t entry)
 {
        struct swap_info_struct * p;
        unsigned long offset, type;
        int result = 0;
 
-       if (!entry)
+       if (!pte_val(entry))
                goto out;
        type = SWP_TYPE(entry);
        if (type & SHM_SWP_TYPE)
@@ -121,41 +112,32 @@ int swap_duplicate(unsigned long entry)
        else {
                static int overflow = 0;
                if (overflow++ < 5)
-                       printk(KERN_WARNING
-                               "swap_duplicate: entry %08lx map count=%d\n",
-                               entry, p->swap_map[offset]);
+                       pte_ERROR(entry);
                p->swap_map[offset] = SWAP_MAP_MAX;
        }
        result = 1;
-#ifdef DEBUG_SWAP
-       printk("DebugVM: swap_duplicate(entry %08lx, count now %d)\n",
-                  entry, p->swap_map[offset]);
-#endif
 out:
        return result;
 
 bad_file:
-       printk(KERN_ERR
-               "swap_duplicate: entry %08lx, nonexistent swap file\n", entry);
+       pte_ERROR(entry);
        goto out;
 bad_offset:
-       printk(KERN_ERR
-               "swap_duplicate: entry %08lx, offset exceeds max\n", entry);
+       pte_ERROR(entry);
        goto out;
 bad_unused:
-       printk(KERN_ERR
-               "swap_duplicate at %8p: entry %08lx, unused page\n", 
-                  __builtin_return_address(0), entry);
+       pte_ERROR(entry);
        goto out;
 }
 
-int swap_count(unsigned long entry)
+int swap_count(struct page *page)
 {
        struct swap_info_struct * p;
        unsigned long offset, type;
+       pte_t entry = get_pagecache_pte(page);
        int retval = 0;
 
-       if (!entry)
+       if (!pte_val(entry))
                goto bad_entry;
        type = SWP_TYPE(entry);
        if (type & SHM_SWP_TYPE)
@@ -169,10 +151,6 @@ int swap_count(unsigned long entry)
        if (!p->swap_map[offset])
                goto bad_unused;
        retval = p->swap_map[offset];
-#ifdef DEBUG_SWAP
-       printk("DebugVM: swap_count(entry %08lx, count %d)\n",
-                  entry, retval);
-#endif
 out:
        return retval;
 
@@ -180,17 +158,13 @@ bad_entry:
        printk(KERN_ERR "swap_count: null entry!\n");
        goto out;
 bad_file:
-       printk(KERN_ERR
-                  "swap_count: entry %08lx, nonexistent swap file!\n", entry);
+       pte_ERROR(entry);
        goto out;
 bad_offset:
-       printk(KERN_ERR
-                  "swap_count: entry %08lx, offset exceeds max!\n", entry);
+       pte_ERROR(entry);
        goto out;
 bad_unused:
-       printk(KERN_ERR
-                  "swap_count at %8p: entry %08lx, unused page!\n", 
-                  __builtin_return_address(0), entry);
+       pte_ERROR(entry);
        goto out;
 }
 
@@ -198,22 +172,13 @@ static inline void remove_from_swap_cache(struct page *page)
 {
        struct inode *inode = page->inode;
 
-       if (!inode) {
-               printk ("VM: Removing swap cache page with zero inode hash "
-                       "on page %08lx\n", page_address(page));
-               return;
-       }
-       if (inode != &swapper_inode) {
-               printk ("VM: Removing swap cache page with wrong inode hash "
-                       "on page %08lx\n", page_address(page));
-       }
+       if (!inode)
+               BUG();
+       if (inode != &swapper_inode)
+               BUG();
        if (!PageSwapCache(page))
                PAGE_BUG(page);
 
-#ifdef DEBUG_SWAP
-       printk("DebugVM: remove_from_swap_cache(%08lx count %d)\n",
-                  page_address(page), page_count(page));
-#endif
        PageClearSwapCache(page);
        remove_inode_page(page);
 }
@@ -224,19 +189,14 @@ static inline void remove_from_swap_cache(struct page *page)
  */
 void __delete_from_swap_cache(struct page *page)
 {
-       long entry = page->offset;
+       pte_t entry = get_pagecache_pte(page);
 
 #ifdef SWAP_CACHE_INFO
        swap_cache_del_total++;
 #endif
-#ifdef DEBUG_SWAP
-       printk("DebugVM: delete_from_swap_cache(%08lx count %d, "
-                  "entry %08lx)\n",
-                  page_address(page), page_count(page), entry);
-#endif
-       remove_from_swap_cache (page);
+       remove_from_swap_cache(page);
        lock_kernel();
-       swap_free (entry);
+       swap_free(entry);
        unlock_kernel();
 }
 
@@ -268,10 +228,8 @@ void delete_from_swap_cache(struct page *page)
  * this page if it is the last user of the page. 
  */
 
-void free_page_and_swap_cache(unsigned long addr)
+void free_page_and_swap_cache(struct page *page)
 {
-       struct page *page = mem_map + MAP_NR(addr);
-
        /* 
         * If we are the only user, then free up the swap cache. 
         */
@@ -295,7 +253,7 @@ void free_page_and_swap_cache(unsigned long addr)
  * lock before returning.
  */
 
-struct page * lookup_swap_cache(unsigned long entry)
+struct page * lookup_swap_cache(pte_t entry)
 {
        struct page *found;
 
@@ -303,7 +261,10 @@ struct page * lookup_swap_cache(unsigned long entry)
        swap_cache_find_total++;
 #endif
        while (1) {
-               found = find_lock_page(&swapper_inode, entry);
+               /*
+                * Right now the pagecache is 32-bit only.
+                */
+               found = find_lock_page(&swapper_inode, pte_val(entry));
                if (!found)
                        return 0;
                if (found->inode != &swapper_inode || !PageSwapCache(found))
@@ -331,15 +292,11 @@ out_bad:
  * the swap entry is no longer in use.
  */
 
-struct page * read_swap_cache_async(unsigned long entry, int wait)
+struct page * read_swap_cache_async(pte_t entry, int wait)
 {
        struct page *found_page = 0, *new_page;
        unsigned long new_page_addr;
        
-#ifdef DEBUG_SWAP
-       printk("DebugVM: read_swap_cache_async entry %08lx%s\n",
-                  entry, wait ? ", wait" : "");
-#endif
        /*
         * Make sure the swap entry is still in use.
         */
@@ -368,11 +325,6 @@ struct page * read_swap_cache_async(unsigned long entry, int wait)
         */
        add_to_swap_cache(new_page, entry);
        rw_swap_page(READ, new_page, wait);
-#ifdef DEBUG_SWAP
-       printk("DebugVM: read_swap_cache_async created "
-                  "entry %08lx at %p\n",
-                       entry, (char *) page_address(new_page));
-#endif
        return new_page;
 
 out_free_page:
index c4ce5377d5ebf79b7e4daf543c5211c871511123..76aea7b7e2b13ee69cfa54c809fd805f50eb12e6 100644 (file)
@@ -81,17 +81,18 @@ static inline int scan_swap_map(struct swap_info_struct *si)
        return 0;
 }
 
-unsigned long get_swap_page(void)
+pte_t get_swap_page(void)
 {
        struct swap_info_struct * p;
-       unsigned long offset, entry;
+       unsigned long offset;
+       pte_t entry = __pte(0);
        int type, wrapped = 0;
 
        type = swap_list.next;
        if (type < 0)
-               return 0;
+               goto out;
        if (nr_swap_pages == 0)
-               return 0;
+               goto out;
 
        while (1) {
                p = &swap_info[type];
@@ -106,7 +107,7 @@ unsigned long get_swap_page(void)
                                } else {
                                        swap_list.next = type;
                                }
-                               return entry;
+                               goto out;
                        }
                }
                type = p->next;
@@ -115,19 +116,21 @@ unsigned long get_swap_page(void)
                                type = swap_list.head;
                                wrapped = 1;
                        }
-               } else if (type < 0) {
-                       return 0;       /* out of swap space */
-               }
+               } else
+                       if (type < 0)
+                               goto out;       /* out of swap space */
        }
+out:
+       return entry;
 }
 
 
-void swap_free(unsigned long entry)
+void swap_free(pte_t entry)
 {
        struct swap_info_struct * p;
        unsigned long offset, type;
 
-       if (!entry)
+       if (!pte_val(entry))
                goto out;
 
        type = SWP_TYPE(entry);
@@ -154,10 +157,6 @@ void swap_free(unsigned long entry)
                        nr_swap_pages++;
                }
        }
-#ifdef DEBUG_SWAP
-       printk("DebugVM: swap_free(entry %08lx, count now %d)\n",
-              entry, p->swap_map[offset]);
-#endif
 out:
        return;
 
@@ -171,24 +170,24 @@ bad_offset:
        printk("swap_free: offset exceeds max\n");
        goto out;
 bad_free:
-       printk("swap_free: swap-space map bad (entry %08lx)\n",entry);
+       pte_ERROR(entry);
        goto out;
 }
 
 /* needs the big kernel lock */
-unsigned long acquire_swap_entry(struct page *page)
+pte_t acquire_swap_entry(struct page *page)
 {
        struct swap_info_struct * p;
        unsigned long offset, type;
-       unsigned long entry;
+       pte_t entry;
 
        if (!test_bit(PG_swap_entry, &page->flags))
                goto new_swap_entry;
 
        /* We have the old entry in the page offset still */
-       entry = page->offset;
-       if (!entry)
+       if (!page->offset)
                goto new_swap_entry;
+       entry = get_pagecache_pte(page);
        type = SWP_TYPE(entry);
        if (type & SHM_SWP_TYPE)
                goto new_swap_entry;
@@ -223,7 +222,7 @@ new_swap_entry:
  * what to do if a write is requested later.
  */
 static inline void unuse_pte(struct vm_area_struct * vma, unsigned long address,
-       pte_t *dir, unsigned long entry, unsigned long page)
+       pte_t *dir, pte_t entry, struct page* page)
 {
        pte_t pte = *dir;
 
@@ -239,7 +238,7 @@ static inline void unuse_pte(struct vm_area_struct * vma, unsigned long address,
                set_pte(dir, pte_mkdirty(pte));
                return;
        }
-       if (pte_val(pte) != entry)
+       if (pte_val(pte) != pte_val(entry))
                return;
        set_pte(dir, pte_mkdirty(mk_pte(page, vma->vm_page_prot)));
        swap_free(entry);
@@ -249,7 +248,7 @@ static inline void unuse_pte(struct vm_area_struct * vma, unsigned long address,
 
 static inline void unuse_pmd(struct vm_area_struct * vma, pmd_t *dir,
        unsigned long address, unsigned long size, unsigned long offset,
-       unsigned long entry, unsigned long page)
+       pte_t entry, struct page* page)
 {
        pte_t * pte;
        unsigned long end;
@@ -257,7 +256,7 @@ static inline void unuse_pmd(struct vm_area_struct * vma, pmd_t *dir,
        if (pmd_none(*dir))
                return;
        if (pmd_bad(*dir)) {
-               printk("unuse_pmd: bad pmd (%08lx)\n", pmd_val(*dir));
+               pmd_ERROR(*dir);
                pmd_clear(dir);
                return;
        }
@@ -271,12 +270,12 @@ static inline void unuse_pmd(struct vm_area_struct * vma, pmd_t *dir,
                unuse_pte(vma, offset+address-vma->vm_start, pte, entry, page);
                address += PAGE_SIZE;
                pte++;
-       } while (address < end);
+       } while (address && (address < end));
 }
 
 static inline void unuse_pgd(struct vm_area_struct * vma, pgd_t *dir,
        unsigned long address, unsigned long size,
-       unsigned long entry, unsigned long page)
+       pte_t entry, struct page* page)
 {
        pmd_t * pmd;
        unsigned long offset, end;
@@ -284,7 +283,7 @@ static inline void unuse_pgd(struct vm_area_struct * vma, pgd_t *dir,
        if (pgd_none(*dir))
                return;
        if (pgd_bad(*dir)) {
-               printk("unuse_pgd: bad pgd (%08lx)\n", pgd_val(*dir));
+               pgd_ERROR(*dir);
                pgd_clear(dir);
                return;
        }
@@ -294,28 +293,32 @@ static inline void unuse_pgd(struct vm_area_struct * vma, pgd_t *dir,
        end = address + size;
        if (end > PGDIR_SIZE)
                end = PGDIR_SIZE;
+       if (address >= end)
+               BUG();
        do {
                unuse_pmd(vma, pmd, address, end - address, offset, entry,
                          page);
                address = (address + PMD_SIZE) & PMD_MASK;
                pmd++;
-       } while (address < end);
+       } while (address && (address < end));
 }
 
 static void unuse_vma(struct vm_area_struct * vma, pgd_t *pgdir,
-                       unsigned long entry, unsigned long page)
+                       pte_t entry, struct page* page)
 {
        unsigned long start = vma->vm_start, end = vma->vm_end;
 
-       while (start < end) {
+       if (start >= end)
+               BUG();
+       do {
                unuse_pgd(vma, pgdir, start, end - start, entry, page);
                start = (start + PGDIR_SIZE) & PGDIR_MASK;
                pgdir++;
-       }
+       } while (start && (start < end));
 }
 
-static void unuse_process(struct mm_struct * mm, unsigned long entry, 
-                       unsigned long page)
+static void unuse_process(struct mm_struct * mm,
+                       pte_t entry, struct page* page)
 {
        struct vm_area_struct* vma;
 
@@ -340,8 +343,8 @@ static int try_to_unuse(unsigned int type)
 {
        struct swap_info_struct * si = &swap_info[type];
        struct task_struct *p;
-       struct page *page_map;
-       unsigned long entry, page;
+       struct page *page;
+       pte_t entry;
        int i;
 
        while (1) {
@@ -361,8 +364,8 @@ static int try_to_unuse(unsigned int type)
                /* Get a page for the entry, using the existing swap
                    cache page if there is one.  Otherwise, get a clean
                    page and read the swap into it. */
-               page_map = read_swap_cache(entry);
-               if (!page_map) {
+               page = read_swap_cache(entry);
+               if (!page) {
                        /*
                         * Continue searching if the entry became unused.
                         */
@@ -370,7 +373,6 @@ static int try_to_unuse(unsigned int type)
                                continue;
                        return -ENOMEM;
                }
-               page = page_address(page_map);
                read_lock(&tasklist_lock);
                for_each_task(p)
                        unuse_process(p->mm, entry, page);
@@ -378,17 +380,15 @@ static int try_to_unuse(unsigned int type)
                shm_unuse(entry, page);
                /* Now get rid of the extra reference to the temporary
                    page we've been using. */
-               if (PageSwapCache(page_map))
-                       delete_from_swap_cache(page_map);
-               __free_page(page_map);
+               if (PageSwapCache(page))
+                       delete_from_swap_cache(page);
+               __free_page(page);
                /*
                 * Check for and clear any overflowed swap map counts.
                 */
                if (si->swap_map[i] != 0) {
                        if (si->swap_map[i] != SWAP_MAP_MAX)
-                               printk(KERN_ERR
-                                       "try_to_unuse: entry %08lx count=%d\n",
-                                       entry, si->swap_map[i]);
+                               pte_ERROR(entry);
                        si->swap_map[i] = 0;
                        nr_swap_pages++;
                }
index 889fabe21c7ac90cc0797ee6c641b23931d37b0a..ebe589d856d0c3aedb49ae5cc1a8957c9f4e852a 100644 (file)
@@ -20,7 +20,7 @@ static inline void free_area_pte(pmd_t * pmd, unsigned long address, unsigned lo
        if (pmd_none(*pmd))
                return;
        if (pmd_bad(*pmd)) {
-               printk("free_area_pte: bad pmd (%08lx)\n", pmd_val(*pmd));
+               pmd_ERROR(*pmd);
                pmd_clear(pmd);
                return;
        }
@@ -29,7 +29,7 @@ static inline void free_area_pte(pmd_t * pmd, unsigned long address, unsigned lo
        end = address + size;
        if (end > PMD_SIZE)
                end = PMD_SIZE;
-       while (address < end) {
+       do {
                pte_t page = *pte;
                pte_clear(pte);
                address += PAGE_SIZE;
@@ -37,11 +37,11 @@ static inline void free_area_pte(pmd_t * pmd, unsigned long address, unsigned lo
                if (pte_none(page))
                        continue;
                if (pte_present(page)) {
-                       free_page(pte_page(page));
+                       __free_page(mem_map+pte_pagenr(page));
                        continue;
                }
                printk("Whee.. Swapped out page in kernel page table\n");
-       }
+       } while (address < end);
 }
 
 static inline void free_area_pmd(pgd_t * dir, unsigned long address, unsigned long size)
@@ -52,7 +52,7 @@ static inline void free_area_pmd(pgd_t * dir, unsigned long address, unsigned lo
        if (pgd_none(*dir))
                return;
        if (pgd_bad(*dir)) {
-               printk("free_area_pmd: bad pgd (%08lx)\n", pgd_val(*dir));
+               pgd_ERROR(*dir);
                pgd_clear(dir);
                return;
        }
@@ -61,11 +61,11 @@ static inline void free_area_pmd(pgd_t * dir, unsigned long address, unsigned lo
        end = address + size;
        if (end > PGDIR_SIZE)
                end = PGDIR_SIZE;
-       while (address < end) {
+       do {
                free_area_pte(pmd, address, end - address);
                address = (address + PMD_SIZE) & PMD_MASK;
                pmd++;
-       }
+       } while (address < end);
 }
 
 void vmfree_area_pages(unsigned long address, unsigned long size)
@@ -75,11 +75,11 @@ void vmfree_area_pages(unsigned long address, unsigned long size)
 
        dir = pgd_offset_k(address);
        flush_cache_all();
-       while (address < end) {
+       do {
                free_area_pmd(dir, address, end - address);
                address = (address + PGDIR_SIZE) & PGDIR_MASK;
                dir++;
-       }
+       } while (address && (address < end));
        flush_tlb_all();
 }
 
@@ -91,17 +91,17 @@ static inline int alloc_area_pte(pte_t * pte, unsigned long address, unsigned lo
        end = address + size;
        if (end > PMD_SIZE)
                end = PMD_SIZE;
-       while (address < end) {
-               unsigned long page;
+       do {
+               struct page * page;
                if (!pte_none(*pte))
                        printk("alloc_area_pte: page already exists\n");
-               page = __get_free_page(GFP_KERNEL|GFP_BIGMEM);
+               page = get_free_highpage(GFP_KERNEL|__GFP_HIGHMEM);
                if (!page)
                        return -ENOMEM;
                set_pte(pte, mk_pte(page, PAGE_KERNEL));
                address += PAGE_SIZE;
                pte++;
-       }
+       } while (address < end);
        return 0;
 }
 
@@ -113,7 +113,7 @@ static inline int alloc_area_pmd(pmd_t * pmd, unsigned long address, unsigned lo
        end = address + size;
        if (end > PGDIR_SIZE)
                end = PGDIR_SIZE;
-       while (address < end) {
+       do {
                pte_t * pte = pte_alloc_kernel(pmd, address);
                if (!pte)
                        return -ENOMEM;
@@ -121,7 +121,7 @@ static inline int alloc_area_pmd(pmd_t * pmd, unsigned long address, unsigned lo
                        return -ENOMEM;
                address = (address + PMD_SIZE) & PMD_MASK;
                pmd++;
-       }
+       } while (address < end);
        return 0;
 }
 
@@ -132,7 +132,7 @@ int vmalloc_area_pages(unsigned long address, unsigned long size)
 
        dir = pgd_offset_k(address);
        flush_cache_all();
-       while (address < end) {
+       do {
                pmd_t *pmd;
                pgd_t olddir = *dir;
                
@@ -145,7 +145,7 @@ int vmalloc_area_pages(unsigned long address, unsigned long size)
                        set_pgdir(address, *dir);
                address = (address + PGDIR_SIZE) & PGDIR_MASK;
                dir++;
-       }
+       } while (address && (address < end));
        flush_tlb_all();
        return 0;
 }
@@ -202,14 +202,19 @@ void * vmalloc(unsigned long size)
        struct vm_struct *area;
 
        size = PAGE_ALIGN(size);
-       if (!size || size > (max_mapnr << PAGE_SHIFT))
+       if (!size || size > (max_mapnr << PAGE_SHIFT)) {
+               BUG();
                return NULL;
+       }
        area = get_vm_area(size);
-       if (!area)
+       if (!area) {
+               BUG();
                return NULL;
+       }
        addr = area->addr;
        if (vmalloc_area_pages(VMALLOC_VMADDR(addr), size)) {
                vfree(addr);
+               BUG();
                return NULL;
        }
        return addr;
index 31b00047a2a39bf713172aaba6b6fe0b11dad1bf..83d987a9f0e57c964dc887f511ba6b42e9a14c0f 100644 (file)
@@ -17,7 +17,7 @@
 #include <linux/smp_lock.h>
 #include <linux/pagemap.h>
 #include <linux/init.h>
-#include <linux/bigmem.h>
+#include <linux/highmem.h>
 
 #include <asm/pgtable.h>
 
  */
 static int try_to_swap_out(struct vm_area_struct* vma, unsigned long address, pte_t * page_table, int gfp_mask)
 {
-       pte_t pte;
-       unsigned long entry;
-       unsigned long page_addr;
+       pte_t pte, entry;
        struct page * page;
 
        pte = *page_table;
        if (!pte_present(pte))
                goto out_failed;
-       page_addr = pte_page(pte);
-       if (MAP_NR(page_addr) >= max_mapnr)
+       page = pte_page(pte);
+       if (page-mem_map >= max_mapnr)
                goto out_failed;
 
-       page = mem_map + MAP_NR(page_addr);
-
        /* Don't look at this pte if it's been accessed recently. */
        if (pte_young(pte)) {
                /*
@@ -62,7 +58,7 @@ static int try_to_swap_out(struct vm_area_struct* vma, unsigned long address, pt
        if (PageReserved(page)
            || PageLocked(page)
            || ((gfp_mask & __GFP_DMA) && !PageDMA(page))
-           || (!(gfp_mask & __GFP_BIGMEM) && PageBIGMEM(page)))
+           || (!(gfp_mask & __GFP_HIGHMEM) && PageHighMem(page)))
                goto out_failed;
 
        /*
@@ -74,9 +70,9 @@ static int try_to_swap_out(struct vm_area_struct* vma, unsigned long address, pt
         * memory, and we should just continue our scan.
         */
        if (PageSwapCache(page)) {
-               entry = page->offset;
+               entry = get_pagecache_pte(page);
                swap_duplicate(entry);
-               set_pte(page_table, __pte(entry));
+               set_pte(page_table, entry);
 drop_pte:
                vma->vm_mm->rss--;
                flush_tlb_page(vma, address);
@@ -150,14 +146,14 @@ drop_pte:
         * page with that swap entry.
         */
        entry = acquire_swap_entry(page);
-       if (!entry)
+       if (!pte_val(entry))
                goto out_failed; /* No swap space left */
                
-       if (!(page = prepare_bigmem_swapout(page)))
+       if (!(page = prepare_highmem_swapout(page)))
                goto out_swap_free;
 
        vma->vm_mm->rss--;
-       set_pte(page_table, __pte(entry));
+       set_pte(page_table, entry);
        vmlist_access_unlock(vma->vm_mm);
 
        flush_tlb_page(vma, address);
@@ -201,7 +197,7 @@ static inline int swap_out_pmd(struct vm_area_struct * vma, pmd_t *dir, unsigned
        if (pmd_none(*dir))
                return 0;
        if (pmd_bad(*dir)) {
-               printk("swap_out_pmd: bad pmd (%08lx)\n", pmd_val(*dir));
+               pmd_ERROR(*dir);
                pmd_clear(dir);
                return 0;
        }
@@ -220,7 +216,7 @@ static inline int swap_out_pmd(struct vm_area_struct * vma, pmd_t *dir, unsigned
                        return result;
                address += PAGE_SIZE;
                pte++;
-       } while (address < end);
+       } while (address && (address < end));
        return 0;
 }
 
@@ -232,7 +228,7 @@ static inline int swap_out_pgd(struct vm_area_struct * vma, pgd_t *dir, unsigned
        if (pgd_none(*dir))
                return 0;
        if (pgd_bad(*dir)) {
-               printk("swap_out_pgd: bad pgd (%08lx)\n", pgd_val(*dir));
+               pgd_ERROR(*dir);
                pgd_clear(dir);
                return 0;
        }
@@ -240,7 +236,7 @@ static inline int swap_out_pgd(struct vm_area_struct * vma, pgd_t *dir, unsigned
        pmd = pmd_offset(dir, address);
 
        pgd_end = (address + PGDIR_SIZE) & PGDIR_MASK;  
-       if (end > pgd_end)
+       if (pgd_end && (end > pgd_end))
                end = pgd_end;
        
        do {
@@ -249,7 +245,7 @@ static inline int swap_out_pgd(struct vm_area_struct * vma, pgd_t *dir, unsigned
                        return result;
                address = (address + PMD_SIZE) & PMD_MASK;
                pmd++;
-       } while (address < end);
+       } while (address && (address < end));
        return 0;
 }
 
@@ -265,13 +261,15 @@ static int swap_out_vma(struct vm_area_struct * vma, unsigned long address, int
        pgdir = pgd_offset(vma->vm_mm, address);
 
        end = vma->vm_end;
-       while (address < end) {
+       if (address >= end)
+               BUG();
+       do {
                int result = swap_out_pgd(vma, pgdir, address, end, gfp_mask);
                if (result)
                        return result;
                address = (address + PGDIR_SIZE) & PGDIR_MASK;
                pgdir++;
-       }
+       } while (address && (address < end));
        return 0;
 }
 
@@ -498,8 +496,8 @@ int kswapd(void *unused)
                 */
                do {
                        /* kswapd is critical to provide GFP_ATOMIC
-                          allocations (not GFP_BIGMEM ones). */
-                       if (nr_free_pages - nr_free_bigpages >= freepages.high)
+                          allocations (not GFP_HIGHMEM ones). */
+                       if (nr_free_pages - nr_free_highpages >= freepages.high)
                                break;
 
                        if (!do_try_to_free_pages(GFP_KSWAPD))