From 6f053df1bad523f574247f86a8f3eb33bb334b5c Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 23 Nov 2007 15:27:51 -0500 Subject: [PATCH] Import 2.3.23pre3 --- Documentation/Configure.help | 81 ++-- arch/i386/config.in | 13 +- arch/i386/defconfig | 8 +- arch/i386/kernel/head.S | 160 +++++++- arch/i386/kernel/irq.c | 1 + arch/i386/kernel/setup.c | 238 +++++++----- arch/i386/kernel/smpboot.c | 55 ++- arch/i386/kernel/traps.c | 8 +- arch/i386/kernel/vm86.c | 4 +- arch/i386/mm/Makefile | 4 - arch/i386/mm/bigmem.c | 33 -- arch/i386/mm/fault.c | 36 +- arch/i386/mm/init.c | 606 +++++++++++++++++------------- arch/i386/mm/ioremap.c | 21 +- drivers/block/ide-dma.c | 2 +- drivers/block/ide.c | 1 + drivers/char/console.c | 24 +- drivers/char/n_tty.c | 2 +- drivers/char/serial.c | 7 +- drivers/char/tty_io.c | 18 +- drivers/net/eepro100.c | 2 +- drivers/net/starfire.c | 1 + drivers/net/via-rhine.c | 5 - drivers/usb/printer.c | 177 +++++---- fs/buffer.c | 25 +- fs/dcache.c | 2 +- fs/exec.c | 71 +++- fs/file.c | 4 +- fs/inode.c | 3 +- fs/iobuf.c | 22 +- fs/nfs/dir.c | 7 +- fs/nfs/symlink.c | 7 +- fs/proc/array.c | 30 +- fs/proc/mem.c | 40 +- include/asm-i386/bigmem.h | 69 ---- include/asm-i386/bugs.h | 4 + include/asm-i386/fixmap.h | 6 +- include/asm-i386/highmem.h | 85 +++++ include/asm-i386/io.h | 36 +- include/asm-i386/page.h | 38 +- include/asm-i386/pgtable-2level.h | 62 +++ include/asm-i386/pgtable-3level.h | 131 +++++++ include/asm-i386/pgtable.h | 202 +++++----- include/asm-i386/processor.h | 7 + include/asm-i386/smp.h | 4 +- include/linux/bigmem.h | 48 --- include/linux/binfmts.h | 2 +- include/linux/bootmem.h | 24 ++ include/linux/fs.h | 11 +- include/linux/highmem.h | 77 ++++ include/linux/iobuf.h | 1 - include/linux/kernel.h | 7 +- include/linux/mm.h | 66 ++-- include/linux/pagemap.h | 47 ++- include/linux/sched.h | 2 +- include/linux/shm.h | 4 +- include/linux/slab.h | 2 +- include/linux/swap.h | 27 +- include/linux/tty.h | 7 +- init/main.c | 36 +- ipc/shm.c | 164 ++++---- kernel/fork.c | 4 +- kernel/printk.c | 2 +- kernel/ptrace.c | 38 +- mm/Makefile | 8 +- mm/bigmem.c | 71 ---- mm/bootmem.c | 217 +++++++++++ mm/filemap.c | 218 +++++------ mm/highmem.c | 81 ++++ mm/memory.c | 207 +++++----- mm/mmap.c | 2 +- mm/mprotect.c | 14 +- mm/mremap.c | 4 +- mm/page_alloc.c | 430 +++++++++++---------- mm/page_io.c | 23 +- mm/slab.c | 11 +- mm/swap_state.c | 112 ++---- mm/swapfile.c | 86 ++--- mm/vmalloc.c | 43 ++- mm/vmscan.c | 44 ++- 80 files changed, 2667 insertions(+), 1835 deletions(-) delete mode 100644 arch/i386/mm/bigmem.c delete mode 100644 include/asm-i386/bigmem.h create mode 100644 include/asm-i386/highmem.h create mode 100644 include/asm-i386/pgtable-2level.h create mode 100644 include/asm-i386/pgtable-3level.h delete mode 100644 include/linux/bigmem.h create mode 100644 include/linux/bootmem.h create mode 100644 include/linux/highmem.h delete mode 100644 mm/bigmem.c create mode 100644 mm/bootmem.c create mode 100644 mm/highmem.c diff --git a/Documentation/Configure.help b/Documentation/Configure.help index 9dc8623641a7..619bc25aaaec 100644 --- a/Documentation/Configure.help +++ b/Documentation/Configure.help @@ -175,18 +175,25 @@ CONFIG_MATHEMU on the Alpha. The only time you would ever not say Y is to say M in order to debug the code. Say Y unless you know what you are doing. -Support for over 1Gig of memory -CONFIG_BIGMEM - Linux can use up to 1 Gigabytes (= 2^30 bytes) of physical memory. - If you are compiling a kernel which will never run on a machine with - more than 1 Gigabyte, answer N here. Otherwise, say Y. - - The actual amount of physical memory may need to be specified using a - kernel command line option such as "mem=256M". (Try "man bootparam" - or see the documentation of your boot loader (lilo or loadlin) about - how to pass options to the kernel at boot time. The lilo procedure - is also explained in the SCSI-HOWTO, available from - http://metalab.unc.edu/mdw/linux.html#howto .) +High Memory support +CONFIG_NOHIGHMEM + If you are compiling a kernel which will never run on a machine + with more than 1 Gigabyte total physical RAM, answer "off" + here (default choice). + + Linux can use up to 64 Gigabytes of physical memory on x86 systems. + High memory is all the physical RAM that could not be directly + mapped by the kernel - ie. 3GB if there is 4GB RAM in the system, + 7GB if there is 8GB RAM in the system. + + If 4 Gigabytes physical RAM or less is used then answer "4GB" here. + + If more than 4 Gigabytes is used then answer "64GB" here. This + selection turns Intel PAE (Physical Address Extension) mode on. + PAE implements 3-level paging on IA32 processors. PAE is fully + supported by Linux, PAE mode is implemented on all recent Intel + processors (PPro and better). NOTE: The "64GB" kernel will not + boot CPUs that not support PAE! Normal PC floppy disk support CONFIG_BLK_DEV_FD @@ -12180,18 +12187,44 @@ Include support for the NetWinder CONFIG_ARCH_NETWINDER Say Y here if you intend to run this kernel on the NetWinder. -Maximum Physical Memory +Virtual/Physical Memory Split CONFIG_1GB - Linux can use up to 2 Gigabytes (= 2^31 bytes) of physical memory. - If you are compiling a kernel which will never run on a machine with - more than 1 Gigabyte, answer "1GB" here. Otherwise, say "2GB". - - The actual amount of physical memory should be specified using a - kernel command line option such as "mem=256M". (Try "man bootparam" - or see the documentation of your boot loader (lilo or loadlin) about - how to pass options to the kernel at boot time. The lilo procedure - is also explained in the SCSI-HOWTO, available from - http://metalab.unc.edu/mdw/linux.html#howto .) + If you are compiling a kernel which will never run on a machine + with more than 1 Gigabyte total physical RAM, answer "3GB/1GB" + here (default choice). + + On 32-bit x86 systems Linux can use up to 64 Gigabytes of physical + memory. However 32-bit x86 processors have only 4 Gigabytes of + virtual memory space. This option specifies the maximum amount of + virtual memory space one process can potentially use. Certain types + of applications (eg. database servers) perform better if they have + as much virtual memory per process as possible. + + The remaining part of the 4G virtual memory space is used by the + kernel to 'permanently map' as much physical memory as possible. + Certain types of applications perform better if there is more + 'permanently mapped' kernel memory. + + [WARNING! Certain boards do not support PCI DMA to physical addresses + bigger than 2 Gigabytes. Non-DMA-able memory must not be permanently + mapped by the kernel, thus a 1G/3G split will not work on such boxes.] + + As you can see there is no 'perfect split' - the fundamental + problem is that 4G of 32-bit virtual memory space is short. So + you'll have to pick your own choice - depending on the application + load of your box. A 2G/2G split is typically a good choice for a + generic Linux server with lots of RAM. + + Any potentially remaining (not permanently mapped) part of physical + memory is called 'high memory'. How much total high memory the kernel + can handle is influenced by the (next) High Memory configuration option. + + The actual amount of total physical memory will either be + autodetected or can be forced by using a kernel command line option + such as "mem=256M". (Try "man bootparam" or see the documentation of + your boot loader (lilo or loadlin) about how to pass options to the + kernel at boot time. The lilo procedure is also explained in the + SCSI-HOWTO, available from http://metalab.unc.edu/mdw/linux.html#howto .) Math emulation CONFIG_NWFPE @@ -12802,7 +12835,7 @@ CONFIG_KHTTPD # LocalWords: KERNNAME kname ktype kernelname Kerneltype KERNTYPE Alt RX mdafb # LocalWords: dataless kerneltype SYSNAME Comtrol Rocketport palmtop fbset EGS # LocalWords: nvram SYSRQ SysRq PrintScreen sysrq NVRAMs NvRAM Shortwave RTTY -# LocalWords: Sitor Amtor Pactor GTOR hayes TX TMOUT JFdocs BIGMEM DAC IRQ's +# LocalWords: Sitor Amtor Pactor GTOR hayes TX TMOUT JFdocs HIGHMEM DAC IRQ's # LocalWords: IDEPCI IDEDMA idedma PDC pdc TRM trm raidtools luthien nuclecu # LocalWords: unam mx miguel koobera uic EMUL solaris pp ieee lpsg co DMAs TOS # LocalWords: BLDCONFIG preloading jumperless BOOTINIT modutils multipath GRE diff --git a/arch/i386/config.in b/arch/i386/config.in index 529b648beeab..b8f081e65f00 100644 --- a/arch/i386/config.in +++ b/arch/i386/config.in @@ -42,6 +42,18 @@ if [ "$CONFIG_MK7" = "y" ]; then define_bool CONFIG_X86_USE_3DNOW y fi +choice 'High Memory Support' \ + "off CONFIG_NOHIGHMEM \ + 4GB CONFIG_HIGHMEM4G \ + 64GB CONFIG_HIGHMEM64G" off +if [ "$CONFIG_HIGHMEM4G" = "y" ]; then + define_bool CONFIG_HIGHMEM y +fi +if [ "$CONFIG_HIGHMEM64G" = "y" ]; then + define_bool CONFIG_HIGHMEM y + define_bool CONFIG_X86_PAE y +fi + bool 'Math emulation' CONFIG_MATH_EMULATION bool 'MTRR (Memory Type Range Register) support' CONFIG_MTRR bool 'Symmetric multi-processing support' CONFIG_SMP @@ -59,7 +71,6 @@ endmenu mainmenu_option next_comment comment 'General setup' -bool 'Support for over 1Gig of memory' CONFIG_BIGMEM bool 'Networking support' CONFIG_NET bool 'SGI Visual Workstation support' CONFIG_VISWS if [ "$CONFIG_VISWS" = "y" ]; then diff --git a/arch/i386/defconfig b/arch/i386/defconfig index 49137cda44ff..d6821a99333d 100644 --- a/arch/i386/defconfig +++ b/arch/i386/defconfig @@ -24,8 +24,9 @@ CONFIG_X86_BSWAP=y CONFIG_X86_POPAD_OK=y CONFIG_X86_TSC=y CONFIG_X86_GOOD_APIC=y -CONFIG_1GB=y -# CONFIG_2GB is not set +CONFIG_NOHIGHMEM=y +# CONFIG_HIGHMEM4G is not set +# CONFIG_HIGHMEM64G is not set # CONFIG_MATH_EMULATION is not set # CONFIG_MTRR is not set CONFIG_SMP=y @@ -40,7 +41,6 @@ CONFIG_MODULES=y # # General setup # -# CONFIG_BIGMEM is not set CONFIG_NET=y # CONFIG_VISWS is not set CONFIG_X86_IO_APIC=y @@ -111,7 +111,7 @@ CONFIG_BLK_DEV_IDEPCI=y # CONFIG_BLK_DEV_OFFBOARD is not set # CONFIG_BLK_DEV_AEC6210 is not set CONFIG_BLK_DEV_PIIX=y -# CONFIG_BLK_DEV_SIS5513 is not set +# CONFIG_BLK_DEV_PIIX_TUNING is not set # CONFIG_IDE_CHIPSETS is not set # CONFIG_BLK_CPQ_DA is not set diff --git a/arch/i386/kernel/head.S b/arch/i386/kernel/head.S index f1aa50586317..423308aae0b6 100644 --- a/arch/i386/kernel/head.S +++ b/arch/i386/kernel/head.S @@ -367,11 +367,13 @@ SYMBOL_NAME(gdt): .org 0x1000 ENTRY(swapper_pg_dir) .long 0x00102007 - .fill __USER_PGD_PTRS-1,4,0 - /* default: 767 entries */ + .long 0x00103007 + .fill BOOT_USER_PGD_PTRS-2,4,0 + /* default: 766 entries */ .long 0x00102007 - /* default: 255 entries */ - .fill __KERNEL_PGD_PTRS-1,4,0 + .long 0x00103007 + /* default: 254 entries */ + .fill BOOT_KERNEL_PGD_PTRS-2,4,0 /* * The page tables are initialized to only 4MB here - the final page @@ -509,16 +511,156 @@ ENTRY(pg0) .long 0x3f0007,0x3f1007,0x3f2007,0x3f3007,0x3f4007,0x3f5007,0x3f6007,0x3f7007 .long 0x3f8007,0x3f9007,0x3fa007,0x3fb007,0x3fc007,0x3fd007,0x3fe007,0x3ff007 -.org 0x3000 -ENTRY(empty_bad_page) - +ENTRY(pg1) + .long 0x400007,0x001007,0x002007,0x003007,0x004007,0x005007,0x006007,0x007007 + .long 0x408007,0x009007,0x00a007,0x00b007,0x00c007,0x00d007,0x00e007,0x00f007 + .long 0x410007,0x011007,0x012007,0x013007,0x014007,0x015007,0x016007,0x017007 + .long 0x418007,0x019007,0x01a007,0x01b007,0x01c007,0x01d007,0x01e007,0x01f007 + .long 0x420007,0x021007,0x022007,0x023007,0x024007,0x025007,0x026007,0x027007 + .long 0x428007,0x029007,0x02a007,0x02b007,0x02c007,0x02d007,0x02e007,0x02f007 + .long 0x430007,0x031007,0x032007,0x033007,0x034007,0x035007,0x036007,0x037007 + .long 0x438007,0x039007,0x03a007,0x03b007,0x03c007,0x03d007,0x03e007,0x03f007 + .long 0x440007,0x041007,0x042007,0x043007,0x044007,0x045007,0x046007,0x047007 + .long 0x448007,0x049007,0x04a007,0x04b007,0x04c007,0x04d007,0x04e007,0x04f007 + .long 0x450007,0x051007,0x052007,0x053007,0x054007,0x055007,0x056007,0x057007 + .long 0x458007,0x059007,0x05a007,0x05b007,0x05c007,0x05d007,0x05e007,0x05f007 + .long 0x460007,0x061007,0x062007,0x063007,0x064007,0x065007,0x066007,0x067007 + .long 0x468007,0x069007,0x06a007,0x06b007,0x06c007,0x06d007,0x06e007,0x06f007 + .long 0x470007,0x071007,0x072007,0x073007,0x074007,0x075007,0x076007,0x077007 + .long 0x478007,0x079007,0x07a007,0x07b007,0x07c007,0x07d007,0x07e007,0x07f007 + .long 0x480007,0x081007,0x082007,0x083007,0x084007,0x085007,0x086007,0x087007 + .long 0x488007,0x089007,0x08a007,0x08b007,0x08c007,0x08d007,0x08e007,0x08f007 + .long 0x490007,0x091007,0x092007,0x093007,0x094007,0x095007,0x096007,0x097007 + .long 0x498007,0x099007,0x09a007,0x09b007,0x09c007,0x09d007,0x09e007,0x09f007 + .long 0x4a0007,0x0a1007,0x0a2007,0x0a3007,0x0a4007,0x0a5007,0x0a6007,0x0a7007 + .long 0x4a8007,0x0a9007,0x0aa007,0x0ab007,0x0ac007,0x0ad007,0x0ae007,0x0af007 + .long 0x4b0007,0x0b1007,0x0b2007,0x0b3007,0x0b4007,0x0b5007,0x0b6007,0x0b7007 + .long 0x4b8007,0x0b9007,0x0ba007,0x0bb007,0x0bc007,0x0bd007,0x0be007,0x0bf007 + .long 0x4c0007,0x0c1007,0x0c2007,0x0c3007,0x0c4007,0x0c5007,0x0c6007,0x0c7007 + .long 0x4c8007,0x0c9007,0x0ca007,0x0cb007,0x0cc007,0x0cd007,0x0ce007,0x0cf007 + .long 0x4d0007,0x0d1007,0x0d2007,0x0d3007,0x0d4007,0x0d5007,0x0d6007,0x0d7007 + .long 0x4d8007,0x0d9007,0x0da007,0x0db007,0x0dc007,0x0dd007,0x0de007,0x0df007 + .long 0x4e0007,0x0e1007,0x0e2007,0x0e3007,0x0e4007,0x0e5007,0x0e6007,0x0e7007 + .long 0x4e8007,0x0e9007,0x0ea007,0x0eb007,0x0ec007,0x0ed007,0x0ee007,0x0ef007 + .long 0x4f0007,0x0f1007,0x0f2007,0x0f3007,0x0f4007,0x0f5007,0x0f6007,0x0f7007 + .long 0x4f8007,0x0f9007,0x0fa007,0x0fb007,0x0fc007,0x0fd007,0x0fe007,0x0ff007 + .long 0x500007,0x001007,0x002007,0x003007,0x004007,0x005007,0x006007,0x007007 + .long 0x508007,0x009007,0x00a007,0x00b007,0x00c007,0x00d007,0x00e007,0x00f007 + .long 0x510007,0x011007,0x012007,0x013007,0x014007,0x015007,0x016007,0x017007 + .long 0x518007,0x019007,0x01a007,0x01b007,0x01c007,0x01d007,0x01e007,0x01f007 + .long 0x520007,0x021007,0x022007,0x023007,0x024007,0x025007,0x026007,0x027007 + .long 0x528007,0x029007,0x02a007,0x02b007,0x02c007,0x02d007,0x02e007,0x02f007 + .long 0x530007,0x031007,0x032007,0x033007,0x034007,0x035007,0x036007,0x037007 + .long 0x538007,0x039007,0x03a007,0x03b007,0x03c007,0x03d007,0x03e007,0x03f007 + .long 0x540007,0x041007,0x042007,0x043007,0x044007,0x045007,0x046007,0x047007 + .long 0x548007,0x049007,0x04a007,0x04b007,0x04c007,0x04d007,0x04e007,0x04f007 + .long 0x550007,0x051007,0x052007,0x053007,0x054007,0x055007,0x056007,0x057007 + .long 0x558007,0x059007,0x05a007,0x05b007,0x05c007,0x05d007,0x05e007,0x05f007 + .long 0x560007,0x061007,0x062007,0x063007,0x064007,0x065007,0x066007,0x067007 + .long 0x568007,0x069007,0x06a007,0x06b007,0x06c007,0x06d007,0x06e007,0x06f007 + .long 0x570007,0x071007,0x072007,0x073007,0x074007,0x075007,0x076007,0x077007 + .long 0x578007,0x079007,0x07a007,0x07b007,0x07c007,0x07d007,0x07e007,0x07f007 + .long 0x580007,0x081007,0x082007,0x083007,0x084007,0x085007,0x086007,0x087007 + .long 0x588007,0x089007,0x08a007,0x08b007,0x08c007,0x08d007,0x08e007,0x08f007 + .long 0x590007,0x091007,0x092007,0x093007,0x094007,0x095007,0x096007,0x097007 + .long 0x598007,0x099007,0x09a007,0x09b007,0x09c007,0x09d007,0x09e007,0x09f007 + .long 0x5a0007,0x0a1007,0x0a2007,0x0a3007,0x0a4007,0x0a5007,0x0a6007,0x0a7007 + .long 0x5a8007,0x0a9007,0x0aa007,0x0ab007,0x0ac007,0x0ad007,0x0ae007,0x0af007 + .long 0x5b0007,0x0b1007,0x0b2007,0x0b3007,0x0b4007,0x0b5007,0x0b6007,0x0b7007 + .long 0x5b8007,0x0b9007,0x0ba007,0x0bb007,0x0bc007,0x0bd007,0x0be007,0x0bf007 + .long 0x5c0007,0x0c1007,0x0c2007,0x0c3007,0x0c4007,0x0c5007,0x0c6007,0x0c7007 + .long 0x5c8007,0x0c9007,0x0ca007,0x0cb007,0x0cc007,0x0cd007,0x0ce007,0x0cf007 + .long 0x5d0007,0x0d1007,0x0d2007,0x0d3007,0x0d4007,0x0d5007,0x0d6007,0x0d7007 + .long 0x5d8007,0x0d9007,0x0da007,0x0db007,0x0dc007,0x0dd007,0x0de007,0x0df007 + .long 0x5e0007,0x0e1007,0x0e2007,0x0e3007,0x0e4007,0x0e5007,0x0e6007,0x0e7007 + .long 0x5e8007,0x0e9007,0x0ea007,0x0eb007,0x0ec007,0x0ed007,0x0ee007,0x0ef007 + .long 0x5f0007,0x0f1007,0x0f2007,0x0f3007,0x0f4007,0x0f5007,0x0f6007,0x0f7007 + .long 0x5f8007,0x0f9007,0x0fa007,0x0fb007,0x0fc007,0x0fd007,0x0fe007,0x0ff007 + .long 0x600007,0x001007,0x002007,0x003007,0x004007,0x005007,0x006007,0x007007 + .long 0x608007,0x009007,0x00a007,0x00b007,0x00c007,0x00d007,0x00e007,0x00f007 + .long 0x610007,0x011007,0x012007,0x013007,0x014007,0x015007,0x016007,0x017007 + .long 0x618007,0x019007,0x01a007,0x01b007,0x01c007,0x01d007,0x01e007,0x01f007 + .long 0x620007,0x021007,0x022007,0x023007,0x024007,0x025007,0x026007,0x027007 + .long 0x628007,0x029007,0x02a007,0x02b007,0x02c007,0x02d007,0x02e007,0x02f007 + .long 0x630007,0x031007,0x032007,0x033007,0x034007,0x035007,0x036007,0x037007 + .long 0x638007,0x039007,0x03a007,0x03b007,0x03c007,0x03d007,0x03e007,0x03f007 + .long 0x640007,0x041007,0x042007,0x043007,0x044007,0x045007,0x046007,0x047007 + .long 0x648007,0x049007,0x04a007,0x04b007,0x04c007,0x04d007,0x04e007,0x04f007 + .long 0x650007,0x051007,0x052007,0x053007,0x054007,0x055007,0x056007,0x057007 + .long 0x658007,0x059007,0x05a007,0x05b007,0x05c007,0x05d007,0x05e007,0x05f007 + .long 0x660007,0x061007,0x062007,0x063007,0x064007,0x065007,0x066007,0x067007 + .long 0x668007,0x069007,0x06a007,0x06b007,0x06c007,0x06d007,0x06e007,0x06f007 + .long 0x670007,0x071007,0x072007,0x073007,0x074007,0x075007,0x076007,0x077007 + .long 0x678007,0x079007,0x07a007,0x07b007,0x07c007,0x07d007,0x07e007,0x07f007 + .long 0x680007,0x081007,0x082007,0x083007,0x084007,0x085007,0x086007,0x087007 + .long 0x688007,0x089007,0x08a007,0x08b007,0x08c007,0x08d007,0x08e007,0x08f007 + .long 0x690007,0x091007,0x092007,0x093007,0x094007,0x095007,0x096007,0x097007 + .long 0x698007,0x099007,0x09a007,0x09b007,0x09c007,0x09d007,0x09e007,0x09f007 + .long 0x6a0007,0x0a1007,0x0a2007,0x0a3007,0x0a4007,0x0a5007,0x0a6007,0x0a7007 + .long 0x6a8007,0x0a9007,0x0aa007,0x0ab007,0x0ac007,0x0ad007,0x0ae007,0x0af007 + .long 0x6b0007,0x0b1007,0x0b2007,0x0b3007,0x0b4007,0x0b5007,0x0b6007,0x0b7007 + .long 0x6b8007,0x0b9007,0x0ba007,0x0bb007,0x0bc007,0x0bd007,0x0be007,0x0bf007 + .long 0x6c0007,0x0c1007,0x0c2007,0x0c3007,0x0c4007,0x0c5007,0x0c6007,0x0c7007 + .long 0x6c8007,0x0c9007,0x0ca007,0x0cb007,0x0cc007,0x0cd007,0x0ce007,0x0cf007 + .long 0x6d0007,0x0d1007,0x0d2007,0x0d3007,0x0d4007,0x0d5007,0x0d6007,0x0d7007 + .long 0x6d8007,0x0d9007,0x0da007,0x0db007,0x0dc007,0x0dd007,0x0de007,0x0df007 + .long 0x6e0007,0x0e1007,0x0e2007,0x0e3007,0x0e4007,0x0e5007,0x0e6007,0x0e7007 + .long 0x6e8007,0x0e9007,0x0ea007,0x0eb007,0x0ec007,0x0ed007,0x0ee007,0x0ef007 + .long 0x6f0007,0x0f1007,0x0f2007,0x0f3007,0x0f4007,0x0f5007,0x0f6007,0x0f7007 + .long 0x6f8007,0x0f9007,0x0fa007,0x0fb007,0x0fc007,0x0fd007,0x0fe007,0x0ff007 + .long 0x700007,0x001007,0x002007,0x003007,0x004007,0x005007,0x006007,0x007007 + .long 0x708007,0x009007,0x00a007,0x00b007,0x00c007,0x00d007,0x00e007,0x00f007 + .long 0x710007,0x011007,0x012007,0x013007,0x014007,0x015007,0x016007,0x017007 + .long 0x718007,0x019007,0x01a007,0x01b007,0x01c007,0x01d007,0x01e007,0x01f007 + .long 0x720007,0x021007,0x022007,0x023007,0x024007,0x025007,0x026007,0x027007 + .long 0x728007,0x029007,0x02a007,0x02b007,0x02c007,0x02d007,0x02e007,0x02f007 + .long 0x730007,0x031007,0x032007,0x033007,0x034007,0x035007,0x036007,0x037007 + .long 0x738007,0x039007,0x03a007,0x03b007,0x03c007,0x03d007,0x03e007,0x03f007 + .long 0x740007,0x041007,0x042007,0x043007,0x044007,0x045007,0x046007,0x047007 + .long 0x748007,0x049007,0x04a007,0x04b007,0x04c007,0x04d007,0x04e007,0x04f007 + .long 0x750007,0x051007,0x052007,0x053007,0x054007,0x055007,0x056007,0x057007 + .long 0x758007,0x059007,0x05a007,0x05b007,0x05c007,0x05d007,0x05e007,0x05f007 + .long 0x760007,0x061007,0x062007,0x063007,0x064007,0x065007,0x066007,0x067007 + .long 0x768007,0x069007,0x06a007,0x06b007,0x06c007,0x06d007,0x06e007,0x06f007 + .long 0x770007,0x071007,0x072007,0x073007,0x074007,0x075007,0x076007,0x077007 + .long 0x778007,0x079007,0x07a007,0x07b007,0x07c007,0x07d007,0x07e007,0x07f007 + .long 0x780007,0x081007,0x082007,0x083007,0x084007,0x085007,0x086007,0x087007 + .long 0x788007,0x089007,0x08a007,0x08b007,0x08c007,0x08d007,0x08e007,0x08f007 + .long 0x790007,0x091007,0x092007,0x093007,0x094007,0x095007,0x096007,0x097007 + .long 0x798007,0x099007,0x09a007,0x09b007,0x09c007,0x09d007,0x09e007,0x09f007 + .long 0x7a0007,0x0a1007,0x0a2007,0x0a3007,0x0a4007,0x0a5007,0x0a6007,0x0a7007 + .long 0x7a8007,0x0a9007,0x0aa007,0x0ab007,0x0ac007,0x0ad007,0x0ae007,0x0af007 + .long 0x7b0007,0x0b1007,0x0b2007,0x0b3007,0x0b4007,0x0b5007,0x0b6007,0x0b7007 + .long 0x7b8007,0x0b9007,0x0ba007,0x0bb007,0x0bc007,0x0bd007,0x0be007,0x0bf007 + .long 0x7c0007,0x0c1007,0x0c2007,0x0c3007,0x0c4007,0x0c5007,0x0c6007,0x0c7007 + .long 0x7c8007,0x0c9007,0x0ca007,0x0cb007,0x0cc007,0x0cd007,0x0ce007,0x0cf007 + .long 0x7d0007,0x0d1007,0x0d2007,0x0d3007,0x0d4007,0x0d5007,0x0d6007,0x0d7007 + .long 0x7d8007,0x0d9007,0x0da007,0x0db007,0x0dc007,0x0dd007,0x0de007,0x0df007 + .long 0x7e0007,0x0e1007,0x0e2007,0x0e3007,0x0e4007,0x0e5007,0x0e6007,0x0e7007 + .long 0x7e8007,0x0e9007,0x0ea007,0x0eb007,0x0ec007,0x0ed007,0x0ee007,0x0ef007 + .long 0x7f0007,0x0f1007,0x0f2007,0x0f3007,0x0f4007,0x0f5007,0x0f6007,0x0f7007 + .long 0x7f8007,0x0f9007,0x0fa007,0x0fb007,0x0fc007,0x0fd007,0x0fe007,0x0ff007 .org 0x4000 -ENTRY(empty_bad_page_table) +ENTRY(empty_zero_page) .org 0x5000 -ENTRY(empty_zero_page) +ENTRY(empty_bad_page) .org 0x6000 +ENTRY(empty_bad_pte_table) + +#if CONFIG_X86_PAE + + .org 0x7000 + ENTRY(empty_bad_pmd_table) + + .org 0x8000 + +#else + + .org 0x7000 + +#endif /* * This starts the data section. Note that the above is all diff --git a/arch/i386/kernel/irq.c b/arch/i386/kernel/irq.c index 8ec329287122..75659aac40dc 100644 --- a/arch/i386/kernel/irq.c +++ b/arch/i386/kernel/irq.c @@ -20,6 +20,7 @@ * Naturally it's not a 1:1 relation, but there are similarities. */ +#include #include #include #include diff --git a/arch/i386/kernel/setup.c b/arch/i386/kernel/setup.c index 734cfca65e84..2ddad6ff6d73 100644 --- a/arch/i386/kernel/setup.c +++ b/arch/i386/kernel/setup.c @@ -54,7 +54,8 @@ #ifdef CONFIG_BLK_DEV_RAM #include #endif -#include +#include +#include #include #include #include @@ -403,10 +404,9 @@ void __init add_memory_region(unsigned long start, #define LOWMEMSIZE() ((*(unsigned short *)__va(0x413)) * 1024) - void __init setup_memory_region(void) { -#define E820_DEBUG 0 +#define E820_DEBUG 1 #ifdef E820_DEBUG int i; #endif @@ -432,9 +432,8 @@ void __init setup_memory_region(void) memcpy(e820.map, E820_MAP, e820.nr_map * sizeof e820.map[0]); #ifdef E820_DEBUG for (i=0; i < e820.nr_map; i++) { - printk("e820: %ld @ %08lx ", - (unsigned long)(e820.map[i].size), - (unsigned long)(e820.map[i].addr)); + printk("e820: %08x @ %08x ", (int)e820.map[i].size, + (int)e820.map[i].addr); switch (e820.map[i].type) { case E820_RAM: printk("(usable)\n"); break; @@ -464,48 +463,11 @@ void __init setup_memory_region(void) } /* setup_memory_region */ -void __init setup_arch(char **cmdline_p, unsigned long * memory_start_p, unsigned long * memory_end_p) +static inline void parse_mem_cmdline (char ** cmdline_p) { - unsigned long high_pfn, max_pfn; char c = ' ', *to = command_line, *from = COMMAND_LINE; int len = 0; - int i; - int usermem=0; - -#ifdef CONFIG_VISWS - visws_get_board_type_and_rev(); -#endif - - ROOT_DEV = to_kdev_t(ORIG_ROOT_DEV); - drive_info = DRIVE_INFO; - screen_info = SCREEN_INFO; - apm_bios_info = APM_BIOS_INFO; - if( SYS_DESC_TABLE.length != 0 ) { - MCA_bus = SYS_DESC_TABLE.table[3] &0x2; - machine_id = SYS_DESC_TABLE.table[0]; - machine_submodel_id = SYS_DESC_TABLE.table[1]; - BIOS_revision = SYS_DESC_TABLE.table[2]; - } - aux_device_present = AUX_DEVICE_INFO; - -#ifdef CONFIG_BLK_DEV_RAM - rd_image_start = RAMDISK_FLAGS & RAMDISK_IMAGE_START_MASK; - rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0); - rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0); -#endif - setup_memory_region(); - - if (!MOUNT_ROOT_RDONLY) - root_mountflags &= ~MS_RDONLY; - init_mm.start_code = (unsigned long) &_text; - init_mm.end_code = (unsigned long) &_etext; - init_mm.end_data = (unsigned long) &_edata; - init_mm.brk = (unsigned long) &_end; - - code_resource.start = virt_to_bus(&_text); - code_resource.end = virt_to_bus(&_etext)-1; - data_resource.start = virt_to_bus(&_etext); - data_resource.end = virt_to_bus(&_edata)-1; + int usermem = 0; /* Save unparsed command line copy for /proc/cmdline */ memcpy(saved_command_line, COMMAND_LINE, COMMAND_LINE_SIZE); @@ -519,8 +481,9 @@ void __init setup_arch(char **cmdline_p, unsigned long * memory_start_p, unsigne * "mem=XXX[KkmM]@XXX[KkmM]" defines a memory region from * to +, overriding the bios size. */ - if (c == ' ' && *(const unsigned long *)from == *(const unsigned long *)"mem=") { - if (to != command_line) to--; + if (c == ' ' && !memcmp(from, "mem=", 4)) { + if (to != command_line) + to--; if (!memcmp(from+4, "nopentium", 9)) { from += 9+4; boot_cpu_data.x86_capability &= ~X86_FEATURE_PSE; @@ -542,7 +505,7 @@ void __init setup_arch(char **cmdline_p, unsigned long * memory_start_p, unsigne } mem_size = memparse(from+4, &from); if (*from == '@') - start_at = memparse(from+1,&from); + start_at = memparse(from+1, &from); else { start_at = HIGH_MEMORY; mem_size -= HIGH_MEMORY; @@ -559,54 +522,158 @@ void __init setup_arch(char **cmdline_p, unsigned long * memory_start_p, unsigne } *to = '\0'; *cmdline_p = command_line; +} - /* Find the highest page frame number we have available */ - max_pfn = 0; - for (i=0; i < e820.nr_map; i++) { - /* RAM? */ - if (e820.map[i].type == E820_RAM) { - unsigned long end_pfn = (e820.map[i].addr + e820.map[i].size) >> PAGE_SHIFT; +void __init setup_arch(char **cmdline_p) +{ + unsigned long bootmap_size; + unsigned long start_pfn, max_pfn, max_low_pfn; + int i; - if (end_pfn > max_pfn) - max_pfn = end_pfn; - } +#ifdef CONFIG_VISWS + visws_get_board_type_and_rev(); +#endif + + ROOT_DEV = to_kdev_t(ORIG_ROOT_DEV); + drive_info = DRIVE_INFO; + screen_info = SCREEN_INFO; + apm_bios_info = APM_BIOS_INFO; + if( SYS_DESC_TABLE.length != 0 ) { + MCA_bus = SYS_DESC_TABLE.table[3] &0x2; + machine_id = SYS_DESC_TABLE.table[0]; + machine_submodel_id = SYS_DESC_TABLE.table[1]; + BIOS_revision = SYS_DESC_TABLE.table[2]; } + aux_device_present = AUX_DEVICE_INFO; -/* - * We can only allocate a limited amount of direct-mapped memory - */ -#define VMALLOC_RESERVE (128 << 20) /* 128MB for vmalloc and initrd */ -#define MAXMEM ((unsigned long)(-PAGE_OFFSET-VMALLOC_RESERVE)) -#define MAXMEM_PFN (MAXMEM >> PAGE_SHIFT) +#ifdef CONFIG_BLK_DEV_RAM + rd_image_start = RAMDISK_FLAGS & RAMDISK_IMAGE_START_MASK; + rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0); + rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0); +#endif + setup_memory_region(); - high_pfn = MAXMEM_PFN; - if (max_pfn < high_pfn) - high_pfn = max_pfn; + if (!MOUNT_ROOT_RDONLY) + root_mountflags &= ~MS_RDONLY; + init_mm.start_code = (unsigned long) &_text; + init_mm.end_code = (unsigned long) &_etext; + init_mm.end_data = (unsigned long) &_edata; + init_mm.brk = (unsigned long) &_end; + + code_resource.start = virt_to_bus(&_text); + code_resource.end = virt_to_bus(&_etext)-1; + data_resource.start = virt_to_bus(&_etext); + data_resource.end = virt_to_bus(&_edata)-1; + + parse_mem_cmdline(cmdline_p); + +#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT) +#define PFN_DOWN(x) ((x) >> PAGE_SHIFT) +#define PFN_PHYS(x) ((x) << PAGE_SHIFT) /* - * But the bigmem stuff may be able to use more of it - * (but currently only up to about 4GB) + * 128MB for vmalloc and initrd */ -#ifdef CONFIG_BIGMEM - #define MAXBIGMEM ((unsigned long)(~(VMALLOC_RESERVE-1))) - #define MAXBIGMEM_PFN (MAXBIGMEM >> PAGE_SHIFT) - if (max_pfn > MAX_PFN) - max_pfn = MAX_PFN; - -/* When debugging, make half of "normal" memory be BIGMEM memory instead */ -#ifdef BIGMEM_DEBUG - high_pfn >>= 1; -#endif +#define VMALLOC_RESERVE (unsigned long)(128 << 20) +#define MAXMEM (unsigned long)(-PAGE_OFFSET-VMALLOC_RESERVE) +#define MAXMEM_PFN PFN_DOWN(MAXMEM) + + /* + * partially used pages are not usable - thus + * we are rounding upwards: + */ + start_pfn = PFN_UP(__pa(&_end)); + + /* + * Find the highest page frame number we have available + */ + max_pfn = 0; + for (i = 0; i < e820.nr_map; i++) { + unsigned long curr_pfn; + /* RAM? */ + if (e820.map[i].type != E820_RAM) + continue; + curr_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size); + if (curr_pfn > max_pfn) + max_pfn = curr_pfn; + } - bigmem_start = high_pfn << PAGE_SHIFT; - bigmem_end = max_pfn << PAGE_SHIFT; - printk(KERN_NOTICE "%ldMB BIGMEM available.\n", (bigmem_end-bigmem_start) >> 20); + /* + * Determine low and high memory ranges: + */ + max_low_pfn = max_pfn; + if (max_low_pfn > MAXMEM_PFN) + max_low_pfn = MAXMEM_PFN; + +#ifdef CONFIG_HIGHMEM + highstart_pfn = highend_pfn = max_pfn; + if (max_pfn > MAXMEM_PFN) { + highstart_pfn = MAXMEM_PFN; + highend_pfn = max_pfn; + printk(KERN_NOTICE "%ldMB HIGHMEM available.\n", + pages_to_mb(highend_pfn - highstart_pfn)); + } #endif + /* + * Initialize the boot-time allocator (with low memory only): + */ + bootmap_size = init_bootmem(start_pfn, max_low_pfn); - ram_resources[1].end = (high_pfn << PAGE_SHIFT)-1; + /* + * FIXME: what about high memory? + */ + ram_resources[1].end = PFN_PHYS(max_low_pfn); - *memory_start_p = (unsigned long) &_end; - *memory_end_p = PAGE_OFFSET + (high_pfn << PAGE_SHIFT); + /* + * Register fully available low RAM pages with the bootmem allocator. + */ + for (i = 0; i < e820.nr_map; i++) { + unsigned long curr_pfn, last_pfn, size; + /* + * Reserve usable low memory + */ + if (e820.map[i].type != E820_RAM) + continue; + /* + * We are rounding up the start address of usable memory: + */ + curr_pfn = PFN_UP(e820.map[i].addr); + if (curr_pfn >= max_low_pfn) + continue; + /* + * ... and at the end of the usable range downwards: + */ + last_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size); + + if (last_pfn > max_low_pfn) + last_pfn = max_low_pfn; + size = last_pfn - curr_pfn; + free_bootmem(PFN_PHYS(curr_pfn), PFN_PHYS(size)); + } + /* + * Reserve the bootmem bitmap itself as well. We do this in two + * steps (first step was init_bootmem()) because this catches + * the (very unlikely) case of us accidentally initializing the + * bootmem allocator with an invalid RAM area. + */ + reserve_bootmem(HIGH_MEMORY, (PFN_PHYS(start_pfn) + + bootmap_size + PAGE_SIZE-1) - (HIGH_MEMORY)); + + /* + * reserve physical page 0 - it's a special BIOS page on many boxes, + * enabling clean reboots, SMP operation, laptop functions. + */ + reserve_bootmem(0, PAGE_SIZE); + +#ifdef __SMP__ + /* + * But first pinch a few for the stack/trampoline stuff + * FIXME: Don't need the extra page at 4K, but need to fix + * trampoline before removing it. (see the GDT stuff) + */ + reserve_bootmem(PAGE_SIZE, PAGE_SIZE); + smp_alloc_memory(); /* AP processor realmode stacks in low memory*/ +#endif #ifdef __SMP__ /* @@ -616,10 +683,11 @@ void __init setup_arch(char **cmdline_p, unsigned long * memory_start_p, unsigne #endif #ifdef CONFIG_BLK_DEV_INITRD +// FIXME needs to do the new bootmem alloc stuff if (LOADER_TYPE) { initrd_start = INITRD_START ? INITRD_START + PAGE_OFFSET : 0; initrd_end = initrd_start+INITRD_SIZE; - if (initrd_end > memory_end) { + if (initrd_end > (max_low_pfn << PAGE_SHIFT)) { printk("initrd extends beyond end of memory " "(0x%08lx > 0x%08lx)\ndisabling initrd\n", initrd_end,memory_end); diff --git a/arch/i386/kernel/smpboot.c b/arch/i386/kernel/smpboot.c index 46335ee8fa9c..f0b3b371e2ea 100644 --- a/arch/i386/kernel/smpboot.c +++ b/arch/i386/kernel/smpboot.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include @@ -630,12 +631,15 @@ static unsigned long __init setup_trampoline(void) * We are called very early to get the low memory for the * SMP bootup trampoline page. */ -unsigned long __init smp_alloc_memory(unsigned long mem_base) +void __init smp_alloc_memory(void) { - if (virt_to_phys((void *)mem_base) >= 0x9F000) + trampoline_base = (void *) alloc_bootmem_pages(PAGE_SIZE); + /* + * Has to be in very low memory so we can execute + * real-mode AP code. + */ + if (__pa(trampoline_base) >= 0x9F000) BUG(); - trampoline_base = (void *)mem_base; - return mem_base + PAGE_SIZE; } /* @@ -804,11 +808,10 @@ void __init setup_local_APIC(void) apic_write(APIC_DFR, value); } -unsigned long __init init_smp_mappings(unsigned long memory_start) +void __init init_smp_mappings(void) { unsigned long apic_phys; - memory_start = PAGE_ALIGN(memory_start); if (smp_found_config) { apic_phys = mp_lapic_addr; } else { @@ -818,11 +821,10 @@ unsigned long __init init_smp_mappings(unsigned long memory_start) * could use the real zero-page, but it's safer * this way if some buggy code writes to this page ... */ - apic_phys = __pa(memory_start); - memset((void *)memory_start, 0, PAGE_SIZE); - memory_start += PAGE_SIZE; + apic_phys = __pa(alloc_bootmem_pages(PAGE_SIZE)); + memset((void *)apic_phys, 0, PAGE_SIZE); } - set_fixmap(FIX_APIC_BASE,apic_phys); + set_fixmap(FIX_APIC_BASE, apic_phys); dprintk("mapped APIC to %08lx (%08lx)\n", APIC_BASE, apic_phys); #ifdef CONFIG_X86_IO_APIC @@ -834,9 +836,8 @@ unsigned long __init init_smp_mappings(unsigned long memory_start) if (smp_found_config) { ioapic_phys = mp_ioapics[i].mpc_apicaddr; } else { - ioapic_phys = __pa(memory_start); - memset((void *)memory_start, 0, PAGE_SIZE); - memory_start += PAGE_SIZE; + ioapic_phys = __pa(alloc_bootmem_pages(PAGE_SIZE)); + memset((void *)ioapic_phys, 0, PAGE_SIZE); } set_fixmap(idx,ioapic_phys); dprintk("mapped IOAPIC to %08lx (%08lx)\n", @@ -845,8 +846,6 @@ unsigned long __init init_smp_mappings(unsigned long memory_start) } } #endif - - return memory_start; } /* @@ -1112,6 +1111,12 @@ int __init start_secondary(void *unused) smp_callin(); while (!atomic_read(&smp_commenced)) /* nothing */ ; + /* + * low-memory mappings have been cleared, flush them from + * the local TLBs too. + */ + local_flush_tlb(); + return cpu_idle(); } @@ -1153,7 +1158,6 @@ static int __init fork_by_hand(void) static void __init do_boot_cpu(int i) { unsigned long cfg; - pgd_t maincfg; struct task_struct *idle; unsigned long send_status, accept_status; int timeout, num_starts, j; @@ -1207,9 +1211,6 @@ static void __init do_boot_cpu(int i) *((volatile unsigned short *) phys_to_virt(0x467)) = start_eip & 0xf; dprintk("3.\n"); - maincfg=swapper_pg_dir[0]; - ((unsigned long *)swapper_pg_dir)[0]=0x102007; - /* * Be paranoid about clearing APIC errors. */ @@ -1367,9 +1368,6 @@ static void __init do_boot_cpu(int i) cpucount--; } - swapper_pg_dir[0]=maincfg; - local_flush_tlb(); - /* mark "stuck" area as not stuck */ *((volatile unsigned long *)phys_to_virt(8192)) = 0; } @@ -1567,14 +1565,9 @@ void __init smp_boot_cpus(void) #ifndef CONFIG_VISWS { - unsigned long cfg; - /* * Install writable page 0 entry to set BIOS data area. */ - cfg = pg0[0]; - /* writeable, present, addr 0 */ - pg0[0] = _PAGE_RW | _PAGE_PRESENT | 0; local_flush_tlb(); /* @@ -1584,12 +1577,6 @@ void __init smp_boot_cpus(void) CMOS_WRITE(0, 0xf); *((volatile long *) phys_to_virt(0x467)) = 0; - - /* - * Restore old page 0 entry. - */ - pg0[0] = cfg; - local_flush_tlb(); } #endif @@ -1646,5 +1633,7 @@ smp_done: */ if (cpu_has_tsc && cpucount) synchronize_tsc_bp(); + + zap_low_mappings(); } diff --git a/arch/i386/kernel/traps.c b/arch/i386/kernel/traps.c index ebd1cd002c09..f66f2363c331 100644 --- a/arch/i386/kernel/traps.c +++ b/arch/i386/kernel/traps.c @@ -581,6 +581,7 @@ asmlinkage void math_emulate(long arg) #endif /* CONFIG_MATH_EMULATION */ +#ifndef CONFIG_M686 void __init trap_init_f00f_bug(void) { unsigned long page; @@ -596,8 +597,8 @@ void __init trap_init_f00f_bug(void) pgd = pgd_offset(&init_mm, page); pmd = pmd_offset(pgd, page); pte = pte_offset(pmd, page); - free_page(pte_page(*pte)); - *pte = mk_pte(&idt_table, PAGE_KERNEL_RO); + __free_page(pte_page(*pte)); + *pte = mk_pte_phys(__pa(&idt_table), PAGE_KERNEL_RO); local_flush_tlb(); /* @@ -608,6 +609,7 @@ void __init trap_init_f00f_bug(void) idt = (struct desc_struct *)page; __asm__ __volatile__("lidt %0": "=m" (idt_descr)); } +#endif #define _set_gate(gate_addr,type,dpl,addr) \ do { \ @@ -772,7 +774,7 @@ cobalt_init(void) #endif void __init trap_init(void) { - if (readl(0x0FFFD9) == 'E' + ('I'<<8) + ('S'<<16) + ('A'<<24)) + if (isa_readl(0x0FFFD9) == 'E'+('I'<<8)+('S'<<16)+('A'<<24)) EISA_bus = 1; set_trap_gate(0,÷_error); diff --git a/arch/i386/kernel/vm86.c b/arch/i386/kernel/vm86.c index 65dd7e9da921..3fd5262ac5ed 100644 --- a/arch/i386/kernel/vm86.c +++ b/arch/i386/kernel/vm86.c @@ -102,7 +102,7 @@ static void mark_screen_rdonly(struct task_struct * tsk) if (pgd_none(*pgd)) return; if (pgd_bad(*pgd)) { - printk("vm86: bad pgd entry [%p]:%08lx\n", pgd, pgd_val(*pgd)); + pgd_ERROR(*pgd); pgd_clear(pgd); return; } @@ -110,7 +110,7 @@ static void mark_screen_rdonly(struct task_struct * tsk) if (pmd_none(*pmd)) return; if (pmd_bad(*pmd)) { - printk("vm86: bad pmd entry [%p]:%08lx\n", pmd, pmd_val(*pmd)); + pmd_ERROR(*pmd); pmd_clear(pmd); return; } diff --git a/arch/i386/mm/Makefile b/arch/i386/mm/Makefile index d60bc196923f..cee7d4e6d129 100644 --- a/arch/i386/mm/Makefile +++ b/arch/i386/mm/Makefile @@ -10,8 +10,4 @@ O_TARGET := mm.o O_OBJS := init.o fault.o ioremap.o extable.o -ifeq ($(CONFIG_BIGMEM),y) -O_OBJS += bigmem.o -endif - include $(TOPDIR)/Rules.make diff --git a/arch/i386/mm/bigmem.c b/arch/i386/mm/bigmem.c deleted file mode 100644 index 8da077927cf8..000000000000 --- a/arch/i386/mm/bigmem.c +++ /dev/null @@ -1,33 +0,0 @@ -/* - * BIGMEM IA32 code and variables. - * - * (C) 1999 Andrea Arcangeli, SuSE GmbH, andrea@suse.de - * Gerhard Wichert, Siemens AG, Gerhard.Wichert@pdb.siemens.de - */ - -#include -#include - -unsigned long bigmem_start, bigmem_end; - -/* NOTE: fixmap_init alloc all the fixmap pagetables contigous on the - physical space so we can cache the place of the first one and move - around without checking the pgd every time. */ -pte_t *kmap_pte; -pgprot_t kmap_prot; - -#define kmap_get_fixmap_pte(vaddr) \ - pte_offset(pmd_offset(pgd_offset_k(vaddr), (vaddr)), (vaddr)) - -void __init kmap_init(void) -{ - unsigned long kmap_vstart; - - /* cache the first kmap pte */ - kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN); - kmap_pte = kmap_get_fixmap_pte(kmap_vstart); - - kmap_prot = PAGE_KERNEL; - if (boot_cpu_data.x86_capability & X86_FEATURE_PGE) - pgprot_val(kmap_prot) |= _PAGE_GLOBAL; -} diff --git a/arch/i386/mm/fault.c b/arch/i386/mm/fault.c index 1f787900511e..b2a98859b7ca 100644 --- a/arch/i386/mm/fault.c +++ b/arch/i386/mm/fault.c @@ -76,6 +76,31 @@ bad_area: return 0; } +static inline void handle_wp_test (void) +{ + const unsigned long vaddr = PAGE_OFFSET; + pgd_t *pgd; + pmd_t *pmd; + pte_t *pte; + + /* + * make it read/writable temporarily, so that the fault + * can be handled. + */ + pgd = swapper_pg_dir + __pgd_offset(vaddr); + pmd = pmd_offset(pgd, vaddr); + pte = pte_offset(pmd, vaddr); + *pte = mk_pte_phys(0, PAGE_KERNEL); + local_flush_tlb(); + + boot_cpu_data.wp_works_ok = 1; + /* + * Beware: Black magic here. The printk is needed here to flush + * CPU state on certain buggy processors. + */ + printk("Ok"); +} + asmlinkage void do_invalid_op(struct pt_regs *, unsigned long); extern unsigned long idt; @@ -226,15 +251,8 @@ no_context: * First we check if it was the bootup rw-test, though.. */ if (boot_cpu_data.wp_works_ok < 0 && - address == PAGE_OFFSET && (error_code & 1)) { - boot_cpu_data.wp_works_ok = 1; - pg0[0] = pte_val(mk_pte(PAGE_OFFSET, PAGE_KERNEL)); - local_flush_tlb(); - /* - * Beware: Black magic here. The printk is needed here to flush - * CPU state on certain buggy processors. - */ - printk("Ok"); + address == PAGE_OFFSET && (error_code & 1)) { + handle_wp_test(); return; } diff --git a/arch/i386/mm/init.c b/arch/i386/mm/init.c index 984488089738..87e53e13283c 100644 --- a/arch/i386/mm/init.c +++ b/arch/i386/mm/init.c @@ -22,7 +22,9 @@ #ifdef CONFIG_BLK_DEV_INITRD #include #endif -#include +#include +#include +#include #include #include @@ -32,22 +34,81 @@ #include #include -static unsigned long totalram = 0; -static unsigned long totalbig = 0; +unsigned long highstart_pfn, highend_pfn; +static unsigned long totalram_pages = 0; +static unsigned long totalhigh_pages = 0; extern void show_net_buffers(void); -extern unsigned long init_smp_mappings(unsigned long); -void __bad_pte_kernel(pmd_t *pmd) +/* + * BAD_PAGE is the page that is used for page faults when linux + * is out-of-memory. Older versions of linux just did a + * do_exit(), but using this instead means there is less risk + * for a process dying in kernel mode, possibly leaving an inode + * unused etc.. + * + * BAD_PAGETABLE is the accompanying page-table: it is initialized + * to point to BAD_PAGE entries. + * + * ZERO_PAGE is a special page that is used for zero-initialized + * data and COW. + */ + +/* + * These are allocated in head.S so that we get proper page alignment. + * If you change the size of these then change head.S as well. + */ +extern char empty_bad_page[PAGE_SIZE]; +#if CONFIG_X86_PAE +extern pmd_t empty_bad_pmd_table[PTRS_PER_PMD]; +#endif +extern pte_t empty_bad_pte_table[PTRS_PER_PTE]; + +/* + * We init them before every return and make them writable-shared. + * This guarantees we get out of the kernel in some more or less sane + * way. + */ +#if CONFIG_X86_PAE +static pmd_t * get_bad_pmd_table(void) { - printk("Bad pmd in pte_alloc: %08lx\n", pmd_val(*pmd)); - pmd_val(*pmd) = _KERNPG_TABLE + __pa(BAD_PAGETABLE); + pmd_t v; + int i; + + pmd_val(v) = _PAGE_TABLE + __pa(empty_bad_pte_table); + + for (i = 0; i < PAGE_SIZE/sizeof(pmd_t); i++) + empty_bad_pmd_table[i] = v; + + return empty_bad_pmd_table; } +#endif -void __bad_pte(pmd_t *pmd) +static pte_t * get_bad_pte_table(void) { - printk("Bad pmd in pte_alloc: %08lx\n", pmd_val(*pmd)); - pmd_val(*pmd) = _PAGE_TABLE + __pa(BAD_PAGETABLE); + pte_t v; + int i; + + v = pte_mkdirty(mk_pte_phys(__pa(empty_bad_page), PAGE_SHARED)); + + for (i = 0; i < PAGE_SIZE/sizeof(pte_t); i++) + empty_bad_pte_table[i] = v; + + return empty_bad_pte_table; +} + + + +void __handle_bad_pmd(pmd_t *pmd) +{ + pmd_ERROR(*pmd); + pmd_val(*pmd) = _PAGE_TABLE + __pa(get_bad_pte_table()); +} + +void __handle_bad_pmd_kernel(pmd_t *pmd) +{ + pmd_ERROR(*pmd); + pmd_val(*pmd) = _KERNPG_TABLE + __pa(get_bad_pte_table()); } pte_t *get_pte_kernel_slow(pmd_t *pmd, unsigned long offset) @@ -57,16 +118,16 @@ pte_t *get_pte_kernel_slow(pmd_t *pmd, unsigned long offset) pte = (pte_t *) __get_free_page(GFP_KERNEL); if (pmd_none(*pmd)) { if (pte) { - clear_page((unsigned long)pte); + clear_page(pte); pmd_val(*pmd) = _KERNPG_TABLE + __pa(pte); return pte + offset; } - pmd_val(*pmd) = _KERNPG_TABLE + __pa(BAD_PAGETABLE); + pmd_val(*pmd) = _KERNPG_TABLE + __pa(get_bad_pte_table()); return NULL; } free_page((unsigned long)pte); if (pmd_bad(*pmd)) { - __bad_pte_kernel(pmd); + __handle_bad_pmd_kernel(pmd); return NULL; } return (pte_t *) pmd_page(*pmd) + offset; @@ -79,19 +140,19 @@ pte_t *get_pte_slow(pmd_t *pmd, unsigned long offset) pte = (unsigned long) __get_free_page(GFP_KERNEL); if (pmd_none(*pmd)) { if (pte) { - clear_page(pte); + clear_page((void *)pte); pmd_val(*pmd) = _PAGE_TABLE + __pa(pte); - return (pte_t *)(pte + offset); + return (pte_t *)pte + offset; } - pmd_val(*pmd) = _PAGE_TABLE + __pa(BAD_PAGETABLE); + pmd_val(*pmd) = _PAGE_TABLE + __pa(get_bad_pte_table()); return NULL; } free_page(pte); if (pmd_bad(*pmd)) { - __bad_pte(pmd); + __handle_bad_pmd(pmd); return NULL; } - return (pte_t *) (pmd_page(*pmd) + offset); + return (pte_t *) pmd_page(*pmd) + offset; } int do_check_pgt_cache(int low, int high) @@ -110,52 +171,36 @@ int do_check_pgt_cache(int low, int high) return freed; } -/* - * BAD_PAGE is the page that is used for page faults when linux - * is out-of-memory. Older versions of linux just did a - * do_exit(), but using this instead means there is less risk - * for a process dying in kernel mode, possibly leaving an inode - * unused etc.. - * - * BAD_PAGETABLE is the accompanying page-table: it is initialized - * to point to BAD_PAGE entries. - * - * ZERO_PAGE is a special page that is used for zero-initialized - * data and COW. - */ -pte_t * __bad_pagetable(void) -{ - extern char empty_bad_page_table[PAGE_SIZE]; - int d0, d1; - - __asm__ __volatile__("cld ; rep ; stosl" - : "=&D" (d0), "=&c" (d1) - : "a" (pte_val(BAD_PAGE)), - "0" ((long) empty_bad_page_table), - "1" (PAGE_SIZE/4) - : "memory"); - return (pte_t *) empty_bad_page_table; -} +/* NOTE: fixmap_init alloc all the fixmap pagetables contigous on the + physical space so we can cache the place of the first one and move + around without checking the pgd every time. */ + +#if CONFIG_HIGHMEM +pte_t *kmap_pte; +pgprot_t kmap_prot; -pte_t __bad_page(void) +#define kmap_get_fixmap_pte(vaddr) \ + pte_offset(pmd_offset(pgd_offset_k(vaddr), (vaddr)), (vaddr)) + +void __init kmap_init(void) { - extern char empty_bad_page[PAGE_SIZE]; - int d0, d1; - - __asm__ __volatile__("cld ; rep ; stosl" - : "=&D" (d0), "=&c" (d1) - : "a" (0), - "0" ((long) empty_bad_page), - "1" (PAGE_SIZE/4) - : "memory"); - return pte_mkdirty(mk_pte((unsigned long) empty_bad_page, PAGE_SHARED)); + unsigned long kmap_vstart; + + /* cache the first kmap pte */ + kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN); + kmap_pte = kmap_get_fixmap_pte(kmap_vstart); + + kmap_prot = PAGE_KERNEL; + if (boot_cpu_data.x86_capability & X86_FEATURE_PGE) + pgprot_val(kmap_prot) |= _PAGE_GLOBAL; } +#endif void show_mem(void) { - int i,free = 0,total = 0,reserved = 0; + int i,free = 0, total = 0, reserved = 0; int shared = 0, cached = 0; - int bigmem = 0; + int highmem = 0; printk("Mem-info:\n"); show_free_areas(); @@ -163,8 +208,8 @@ void show_mem(void) i = max_mapnr; while (i-- > 0) { total++; - if (PageBIGMEM(mem_map+i)) - bigmem++; + if (PageHighMem(mem_map+i)) + highmem++; if (PageReserved(mem_map+i)) reserved++; else if (PageSwapCache(mem_map+i)) @@ -174,8 +219,8 @@ void show_mem(void) else shared += page_count(mem_map+i) - 1; } - printk("%d pages of RAM\n",total); - printk("%d pages of BIGMEM\n",bigmem); + printk("%d pages of RAM\n", total); + printk("%d pages of HIGHMEM\n",highmem); printk("%d reserved pages\n",reserved); printk("%d pages shared\n",shared); printk("%d pages swap cached\n",cached); @@ -186,48 +231,30 @@ void show_mem(void) #endif } -extern unsigned long free_area_init(unsigned long, unsigned long); - /* References to section boundaries */ extern char _text, _etext, _edata, __bss_start, _end; extern char __init_begin, __init_end; -/* - * allocate page table(s) for compile-time fixed mappings - */ -static unsigned long __init fixmap_init(unsigned long start_mem) -{ - pgd_t * pg_dir; - unsigned int idx; - unsigned long address; - - start_mem = PAGE_ALIGN(start_mem); - - for (idx=1; idx <= __end_of_fixed_addresses; idx += PTRS_PER_PTE) - { - address = __fix_to_virt(__end_of_fixed_addresses-idx); - pg_dir = swapper_pg_dir + (address >> PGDIR_SHIFT); - memset((void *)start_mem, 0, PAGE_SIZE); - pgd_val(*pg_dir) = _PAGE_TABLE | __pa(start_mem); - start_mem += PAGE_SIZE; - } - - return start_mem; -} - static void set_pte_phys (unsigned long vaddr, unsigned long phys) { pgprot_t prot; - pte_t * pte; + pgd_t *pgd; + pmd_t *pmd; + pte_t *pte; - pte = pte_offset(pmd_offset(pgd_offset_k(vaddr), vaddr), vaddr); + pgd = swapper_pg_dir + __pgd_offset(vaddr); + pmd = pmd_offset(pgd, vaddr); + pte = pte_offset(pmd, vaddr); prot = PAGE_KERNEL; if (boot_cpu_data.x86_capability & X86_FEATURE_PGE) pgprot_val(prot) |= _PAGE_GLOBAL; set_pte(pte, mk_pte_phys(phys, prot)); - local_flush_tlb(); + /* + * It's enough to flush this one mapping. + */ + __flush_tlb_one(vaddr); } void set_fixmap (enum fixed_addresses idx, unsigned long phys) @@ -241,6 +268,123 @@ void set_fixmap (enum fixed_addresses idx, unsigned long phys) set_pte_phys (address,phys); } +static void __init pagetable_init(void) +{ + pgd_t *pgd, *pgd_base; + pmd_t *pmd; + pte_t *pte; + int i, j, k; + unsigned long vaddr; + unsigned long end = (unsigned long)__va(max_low_pfn*PAGE_SIZE); + + pgd_base = swapper_pg_dir; + + vaddr = PAGE_OFFSET; + i = __pgd_offset(vaddr); + pgd = pgd_base + i; + + for (; (i < PTRS_PER_PGD) && (vaddr <= end); pgd++, i++) { + vaddr = i*PGDIR_SIZE; +#if CONFIG_X86_PAE + pmd = (pmd_t *) alloc_bootmem_pages(PAGE_SIZE); + memset((void*)pmd, 0, PAGE_SIZE); + pgd_val(*pgd) = __pa(pmd) + 0x1; +#else + pmd = (pmd_t *)pgd; +#endif + if (pmd != pmd_offset(pgd, 0)) + BUG(); + for (j = 0; (j < PTRS_PER_PMD) && (vaddr <= end); pmd++, j++) { + vaddr = i*PGDIR_SIZE + j*PMD_SIZE; + if (cpu_has_pse) { + unsigned long __pe; + + set_in_cr4(X86_CR4_PSE); + boot_cpu_data.wp_works_ok = 1; + __pe = _KERNPG_TABLE + _PAGE_PSE + __pa(vaddr); + /* Make it "global" too if supported */ + if (cpu_has_pge) { + set_in_cr4(X86_CR4_PGE); + __pe += _PAGE_GLOBAL; + } + pmd_val(*pmd) = __pe; + continue; + } + + pte = (pte_t *) alloc_bootmem_pages(PAGE_SIZE); + memset((void*)pte, 0, PAGE_SIZE); + pmd_val(*pmd) = _KERNPG_TABLE + __pa(pte); + + if (pte != pte_offset(pmd, 0)) + BUG(); + + for (k = 0; + (k < PTRS_PER_PTE) && (vaddr <= end); + pte++, k++) { + vaddr = i*PGDIR_SIZE + j*PMD_SIZE + k*PAGE_SIZE; + *pte = mk_pte_phys(__pa(vaddr), PAGE_KERNEL); + } + } + } + + /* + * Fixed mappings, only the page table structure has to be + * created - mappings will be set by set_fixmap(): + */ + + vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK; + i = __pgd_offset(vaddr); + j = __pmd_offset(vaddr); + pgd = pgd_base + i; + + for ( ; (i < PTRS_PER_PGD) && vaddr; pgd++, i++) { +#if CONFIG_X86_PAE + if (pgd_none(*pgd)) { + pmd = (pmd_t *) alloc_bootmem_pages(PAGE_SIZE); + memset((void*)pmd, 0, PAGE_SIZE); + pgd_val(*pgd) = __pa(pmd) + 0x1; + if (pmd != pmd_offset(pgd, vaddr)) + BUG(); + } + pmd = pmd_offset(pgd, vaddr); +#else + pmd = (pmd_t *)pgd; +#endif + for (; (j < PTRS_PER_PMD) && vaddr; pmd++, j++) { + if (pmd_none(*pmd)) { + pte = (pte_t *) alloc_bootmem_pages(PAGE_SIZE); + memset((void*)pte, 0, PAGE_SIZE); + pmd_val(*pmd) = _KERNPG_TABLE + __pa(pte); + if (pte != pte_offset(pmd, 0)) + BUG(); + } + vaddr += PMD_SIZE; + } + j = 0; + } + +#if CONFIG_X86_PAE + /* + * Add low memory identity-mappings - SMP needs it when + * starting up on an AP from real-mode. In the non-PAE + * case we already have these mappings through head.S. + * All user-space mappings are explicitly cleared after + * SMP startup. + */ + pgd_base[0] = pgd_base[USER_PTRS_PER_PGD]; +#endif +} + +void __init zap_low_mappings (void) +{ + int i; + /* + * Zap initial low-memory mappings: + */ + for (i = 0; i < USER_PTRS_PER_PGD; i++) + pgd_clear(swapper_pg_dir + i); +} + /* * paging_init() sets up the page tables - note that the first 4MB are * already mapped by head.S. @@ -248,89 +392,36 @@ void set_fixmap (enum fixed_addresses idx, unsigned long phys) * This routines also unmaps the page at virtual kernel address 0, so * that we can trap those pesky NULL-reference errors in the kernel. */ -unsigned long __init paging_init(unsigned long start_mem, unsigned long end_mem) +void __init paging_init(void) { - pgd_t * pg_dir; - pte_t * pg_table; - unsigned long tmp; - unsigned long address; + pagetable_init(); -/* - * Physical page 0 is special; it's not touched by Linux since BIOS - * and SMM (for laptops with [34]86/SL chips) may need it. It is read - * and write protected to detect null pointer references in the - * kernel. - * It may also hold the MP configuration table when we are booting SMP. - */ - start_mem = PAGE_ALIGN(start_mem); - address = PAGE_OFFSET; - pg_dir = swapper_pg_dir; - /* unmap the original low memory mappings */ - pgd_val(pg_dir[0]) = 0; - - /* Map whole memory from PAGE_OFFSET */ - pg_dir += USER_PGD_PTRS; - while (address < end_mem) { - /* - * If we're running on a Pentium CPU, we can use the 4MB - * page tables. - * - * The page tables we create span up to the next 4MB - * virtual memory boundary, but that's OK as we won't - * use that memory anyway. - */ - if (boot_cpu_data.x86_capability & X86_FEATURE_PSE) { - unsigned long __pe; - - set_in_cr4(X86_CR4_PSE); - boot_cpu_data.wp_works_ok = 1; - __pe = _KERNPG_TABLE + _PAGE_4M + __pa(address); - /* Make it "global" too if supported */ - if (boot_cpu_data.x86_capability & X86_FEATURE_PGE) { - set_in_cr4(X86_CR4_PGE); - __pe += _PAGE_GLOBAL; - } - pgd_val(*pg_dir) = __pe; - pg_dir++; - address += 4*1024*1024; - continue; - } + __asm__( "movl %%ecx,%%cr3\n" ::"c"(__pa(swapper_pg_dir))); - /* - * We're on a [34]86, use normal page tables. - * pg_table is physical at this point - */ - pg_table = (pte_t *) (PAGE_MASK & pgd_val(*pg_dir)); - if (!pg_table) { - pg_table = (pte_t *) __pa(start_mem); - start_mem += PAGE_SIZE; - } +#if CONFIG_X86_PAE + /* + * We will bail out later - printk doesnt work right now so + * the user would just see a hanging kernel. + */ + if (cpu_has_pae) + set_in_cr4(X86_CR4_PAE); +#endif + + __flush_tlb(); - pgd_val(*pg_dir) = _PAGE_TABLE | (unsigned long) pg_table; - pg_dir++; - - /* now change pg_table to kernel virtual addresses */ - pg_table = (pte_t *) __va(pg_table); - for (tmp = 0 ; tmp < PTRS_PER_PTE ; tmp++,pg_table++) { - pte_t pte = mk_pte(address, PAGE_KERNEL); - if (address >= end_mem) - pte_val(pte) = 0; - set_pte(pg_table, pte); - address += PAGE_SIZE; - } - } - start_mem = fixmap_init(start_mem); #ifdef __SMP__ - start_mem = init_smp_mappings(start_mem); + init_smp_mappings(); #endif - local_flush_tlb(); -#ifndef CONFIG_BIGMEM - return free_area_init(start_mem, end_mem); -#else +#ifdef CONFIG_HIGHMEM kmap_init(); /* run after fixmap_init */ - return free_area_init(start_mem, bigmem_end + PAGE_OFFSET); #endif +#ifdef CONFIG_HIGHMEM + free_area_init(highend_pfn); +#else + free_area_init(max_low_pfn); +#endif + return; } /* @@ -341,23 +432,38 @@ unsigned long __init paging_init(unsigned long start_mem, unsigned long end_mem) void __init test_wp_bit(void) { - unsigned char tmp_reg; - unsigned long old = pg0[0]; +/* + * Ok, all PAE-capable CPUs are definitely handling the WP bit right. + */ +//#ifndef CONFIG_X86_PAE + const unsigned long vaddr = PAGE_OFFSET; + pgd_t *pgd; + pmd_t *pmd; + pte_t *pte, old_pte; + char tmp_reg; printk("Checking if this processor honours the WP bit even in supervisor mode... "); - pg0[0] = pte_val(mk_pte(PAGE_OFFSET, PAGE_READONLY)); + + pgd = swapper_pg_dir + __pgd_offset(vaddr); + pmd = pmd_offset(pgd, vaddr); + pte = pte_offset(pmd, vaddr); + old_pte = *pte; + *pte = mk_pte_phys(0, PAGE_READONLY); local_flush_tlb(); + __asm__ __volatile__( "jmp 1f; 1:\n" "movb %0,%1\n" "movb %1,%0\n" "jmp 1f; 1:\n" - :"=m" (*(char *) __va(0)), + :"=m" (*(char *) vaddr), "=q" (tmp_reg) :/* no inputs */ :"memory"); - pg0[0] = old; + + *pte = old_pte; local_flush_tlb(); + if (boot_cpu_data.wp_works_ok < 0) { boot_cpu_data.wp_works_ok = 0; printk("No.\n"); @@ -366,136 +472,95 @@ void __init test_wp_bit(void) #endif } else printk(".\n"); +//#endif } -static void __init mem_init_region(unsigned long pfn, unsigned long count, unsigned long start_mem_pfn) +static inline int page_is_ram (unsigned long pagenr) { - printk("memory region: %luk @ %08lx000\n", count << 2, pfn); + int i; - do { - if (pfn >= max_mapnr) - break; + for (i = 0; i < e820.nr_map; i++) { + unsigned long addr, size; - /* Avoid the kernel mapping between HIGH_MEMORY and "start_mem".. */ - if (pfn < (HIGH_MEMORY >> PAGE_SHIFT) || pfn >= start_mem_pfn) - clear_bit(PG_reserved, &mem_map[pfn].flags); - - pfn++; - } while (--count > 0); + if (e820.map[i].type != E820_RAM) /* not usable memory */ + continue; + addr = (e820.map[i].addr+PAGE_SIZE-1) >> PAGE_SHIFT; + size = e820.map[i].size >> PAGE_SHIFT; + if ((pagenr >= addr) && (pagenr < addr+size)) + return 1; + } + return 0; } -void __init mem_init(unsigned long start_mem, unsigned long end_mem) +void __init mem_init(void) { - unsigned long start_low_mem = PAGE_SIZE; int codepages = 0; int reservedpages = 0; int datapages = 0; int initpages = 0; - unsigned long tmp; - int i, avail; - - end_mem &= PAGE_MASK; -#ifdef CONFIG_BIGMEM - bigmem_start = PAGE_ALIGN(bigmem_start); - bigmem_end &= PAGE_MASK; -#endif - high_memory = (void *) end_mem; -#ifndef CONFIG_BIGMEM - max_mapnr = num_physpages = MAP_NR(end_mem); +#ifdef CONFIG_HIGHMEM + int tmp; + + if (!mem_map) + BUG(); + highmem_start_page = mem_map + highstart_pfn; + /* cache the highmem_mapnr */ + highmem_mapnr = highstart_pfn; + max_mapnr = num_physpages = highend_pfn; #else - max_mapnr = num_physpages = PHYSMAP_NR(bigmem_end); - /* cache the bigmem_mapnr */ - bigmem_mapnr = PHYSMAP_NR(bigmem_start); + max_mapnr = num_physpages = max_low_pfn; #endif + high_memory = (void *) __va(max_low_pfn * PAGE_SIZE); /* clear the zero-page */ memset(empty_zero_page, 0, PAGE_SIZE); - /* mark usable pages in the mem_map[] */ - start_low_mem = PAGE_ALIGN(start_low_mem)+PAGE_OFFSET; + /* this will put all low memory onto the freelists */ + totalram_pages += free_all_bootmem(); -#ifdef __SMP__ - /* - * But first pinch a few for the stack/trampoline stuff - * FIXME: Don't need the extra page at 4K, but need to fix - * trampoline before removing it. (see the GDT stuff) - * - */ - start_low_mem += PAGE_SIZE; /* 32bit startup code */ - start_low_mem = smp_alloc_memory(start_low_mem); /* AP processor stacks */ -#endif - start_mem = PAGE_ALIGN(start_mem); +#ifdef CONFIG_HIGHMEM + for (tmp = highstart_pfn; tmp < highend_pfn; tmp++) { + struct page *page = mem_map + tmp; - /* walk the whitelist, unreserving good memory - */ - for (avail = i = 0; i < e820.nr_map; i++) { - unsigned long start_pfn, end_pfn; - - if (e820.map[i].type != E820_RAM) /* not usable memory */ - continue; - - start_pfn = (e820.map[i].addr + PAGE_SIZE - 1) >> PAGE_SHIFT; - end_pfn = (e820.map[i].addr + e820.map[i].size) >> PAGE_SHIFT; - - /* We have a certain amount of low memory reserved */ - if (start_pfn < MAP_NR(start_low_mem)) - start_pfn = MAP_NR(start_low_mem); - - if (end_pfn <= start_pfn) - continue; - - mem_init_region(start_pfn, end_pfn - start_pfn, MAP_NR(start_mem)); - } - - for (tmp = PAGE_OFFSET ; tmp < end_mem ; tmp += PAGE_SIZE) { - if (tmp >= MAX_DMA_ADDRESS) - clear_bit(PG_DMA, &mem_map[MAP_NR(tmp)].flags); - if (PageReserved(mem_map+MAP_NR(tmp))) { - if (tmp >= (unsigned long) &_text && tmp < (unsigned long) &_edata) { - if (tmp < (unsigned long) &_etext) - codepages++; - else - datapages++; - } else if (tmp >= (unsigned long) &__init_begin - && tmp < (unsigned long) &__init_end) - initpages++; - else if (tmp >= (unsigned long) &__bss_start - && tmp < (unsigned long) start_mem) - datapages++; - else - reservedpages++; + if (!page_is_ram(tmp)) { + SetPageReserved(page); continue; } - set_page_count(mem_map+MAP_NR(tmp), 1); - totalram += PAGE_SIZE; -#ifdef CONFIG_BLK_DEV_INITRD - if (!initrd_start || (tmp < initrd_start || tmp >= initrd_end)) -#endif - free_page(tmp); + ClearPageReserved(page); + set_bit(PG_highmem, &page->flags); + atomic_set(&page->count, 1); + __free_page(page); + totalhigh_pages++; } -#ifdef CONFIG_BIGMEM - for (tmp = bigmem_start; tmp < bigmem_end; tmp += PAGE_SIZE) { - clear_bit(PG_reserved, &mem_map[PHYSMAP_NR(tmp)].flags); - set_bit(PG_BIGMEM, &mem_map[PHYSMAP_NR(tmp)].flags); - atomic_set(&mem_map[PHYSMAP_NR(tmp)].count, 1); - free_page(tmp + PAGE_OFFSET); - totalbig += PAGE_SIZE; - } - totalram += totalbig; + totalram_pages += totalhigh_pages; #endif - printk("Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, %dk init, %dk bigmem)\n", + printk("Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, %dk init, %ldk highmem)\n", (unsigned long) nr_free_pages << (PAGE_SHIFT-10), max_mapnr << (PAGE_SHIFT-10), codepages << (PAGE_SHIFT-10), reservedpages << (PAGE_SHIFT-10), datapages << (PAGE_SHIFT-10), initpages << (PAGE_SHIFT-10), - (int) (totalbig >> 10) + (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10)) ); +#if CONFIG_X86_PAE + if (!cpu_has_pae) + panic("cannot execute a PAE-enabled kernel on a PAE-incapable CPU!"); +#endif if (boot_cpu_data.wp_works_ok < 0) test_wp_bit(); + /* + * Subtle. SMP is doing it's boot stuff late (because it has to + * fork idle threads) - but it also needs low mappings for the + * protected-mode entry to work. We zap these entries only after + * the WP-bit has been tested. + */ +#ifndef CONFIG_SMP + zap_low_mappings(); +#endif + } void free_initmem(void) @@ -504,21 +569,22 @@ void free_initmem(void) addr = (unsigned long)(&__init_begin); for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) { - mem_map[MAP_NR(addr)].flags &= ~(1 << PG_reserved); + ClearPageReserved(mem_map + MAP_NR(addr)); set_page_count(mem_map+MAP_NR(addr), 1); free_page(addr); - totalram += PAGE_SIZE; + totalram_pages++; } printk ("Freeing unused kernel memory: %dk freed\n", (&__init_end - &__init_begin) >> 10); } void si_meminfo(struct sysinfo *val) { - val->totalram = totalram; + val->totalram = totalram_pages; val->sharedram = 0; - val->freeram = nr_free_pages << PAGE_SHIFT; - val->bufferram = atomic_read(&buffermem); - val->totalbig = totalbig; - val->freebig = nr_free_bigpages << PAGE_SHIFT; + val->freeram = nr_free_pages; + val->bufferram = atomic_read(&buffermem_pages); + val->totalhigh = totalhigh_pages; + val->freehigh = nr_free_highpages; + val->mem_unit = PAGE_SIZE; return; } diff --git a/arch/i386/mm/ioremap.c b/arch/i386/mm/ioremap.c index 32f3c33fd476..d694553100bc 100644 --- a/arch/i386/mm/ioremap.c +++ b/arch/i386/mm/ioremap.c @@ -20,15 +20,19 @@ static inline void remap_area_pte(pte_t * pte, unsigned long address, unsigned l end = address + size; if (end > PMD_SIZE) end = PMD_SIZE; + if (address >= end) + BUG(); do { - if (!pte_none(*pte)) + if (!pte_none(*pte)) { printk("remap_area_pte: page already exists\n"); + BUG(); + } set_pte(pte, mk_pte_phys(phys_addr, __pgprot(_PAGE_PRESENT | _PAGE_RW | _PAGE_DIRTY | _PAGE_ACCESSED | flags))); address += PAGE_SIZE; phys_addr += PAGE_SIZE; pte++; - } while (address < end); + } while (address && (address < end)); } static inline int remap_area_pmd(pmd_t * pmd, unsigned long address, unsigned long size, @@ -41,6 +45,8 @@ static inline int remap_area_pmd(pmd_t * pmd, unsigned long address, unsigned lo if (end > PGDIR_SIZE) end = PGDIR_SIZE; phys_addr -= address; + if (address >= end) + BUG(); do { pte_t * pte = pte_alloc_kernel(pmd, address); if (!pte) @@ -48,7 +54,7 @@ static inline int remap_area_pmd(pmd_t * pmd, unsigned long address, unsigned lo remap_area_pte(pte, address, end - address, address + phys_addr, flags); address = (address + PMD_SIZE) & PMD_MASK; pmd++; - } while (address < end); + } while (address && (address < end)); return 0; } @@ -61,8 +67,11 @@ static int remap_area_pages(unsigned long address, unsigned long phys_addr, phys_addr -= address; dir = pgd_offset(&init_mm, address); flush_cache_all(); - while (address < end) { - pmd_t *pmd = pmd_alloc_kernel(dir, address); + if (address >= end) + BUG(); + do { + pmd_t *pmd; + pmd = pmd_alloc_kernel(dir, address); if (!pmd) return -ENOMEM; if (remap_area_pmd(pmd, address, end - address, @@ -71,7 +80,7 @@ static int remap_area_pages(unsigned long address, unsigned long phys_addr, set_pgdir(address, *dir); address = (address + PGDIR_SIZE) & PGDIR_MASK; dir++; - } + } while (address && (address < end)); flush_tlb_all(); return 0; } diff --git a/drivers/block/ide-dma.c b/drivers/block/ide-dma.c index b1d0979c3ba9..338d51c9a951 100644 --- a/drivers/block/ide-dma.c +++ b/drivers/block/ide-dma.c @@ -461,7 +461,7 @@ int ide_dmaproc (ide_dma_action_t func, ide_drive_t *drive) int ide_release_dma (ide_hwif_t *hwif) { if (hwif->dmatable) { - clear_page((unsigned long)hwif->dmatable); /* clear PRD 1st */ + clear_page((void *)hwif->dmatable); /* clear PRD 1st */ free_page((unsigned long)hwif->dmatable); /* free PRD 2nd */ } if ((hwif->dma_extra) && (hwif->channel == 0)) diff --git a/drivers/block/ide.c b/drivers/block/ide.c index 1a506e3536eb..86b6c022f55a 100644 --- a/drivers/block/ide.c +++ b/drivers/block/ide.c @@ -923,6 +923,7 @@ void ide_error (ide_drive_t *drive, const char *msg, byte stat) */ void ide_cmd(ide_drive_t *drive, byte cmd, byte nsect, ide_handler_t *handler) { + drive->timeout = WAIT_CMD; ide_set_handler (drive, handler); if (IDE_CONTROL_REG) OUT_BYTE(drive->ctl,IDE_CONTROL_REG); /* clear nIEN */ diff --git a/drivers/char/console.c b/drivers/char/console.c index f452237ab979..49512fe42e21 100644 --- a/drivers/char/console.c +++ b/drivers/char/console.c @@ -94,6 +94,7 @@ #ifdef CONFIG_APM #include #endif +#include #include #include @@ -2286,7 +2287,7 @@ static void vc_init(unsigned int currcons, unsigned int rows, unsigned int cols, struct tty_driver console_driver; static int console_refcount; -unsigned long __init con_init(unsigned long kmem_start) +void __init con_init(void) { const char *display_desc = NULL; unsigned int currcons = 0; @@ -2295,7 +2296,7 @@ unsigned long __init con_init(unsigned long kmem_start) display_desc = conswitchp->con_startup(); if (!display_desc) { fg_console = 0; - return kmem_start; + return; } memset(&console_driver, 0, sizeof(struct tty_driver)); @@ -2336,19 +2337,18 @@ unsigned long __init con_init(unsigned long kmem_start) timer_active |= 1<con_save_screen); @@ -2376,8 +2376,6 @@ unsigned long __init con_init(unsigned long kmem_start) #endif init_bh(CONSOLE_BH, console_bh); - - return kmem_start; } #ifndef VT_SINGLE_DRIVER diff --git a/drivers/char/n_tty.c b/drivers/char/n_tty.c index 9027aa67ee01..0da69c55c8f9 100644 --- a/drivers/char/n_tty.c +++ b/drivers/char/n_tty.c @@ -811,7 +811,7 @@ static int n_tty_open(struct tty_struct *tty) if (!tty->read_buf) { tty->read_buf = (unsigned char *) - get_free_page(in_interrupt() ? GFP_ATOMIC : GFP_KERNEL); + get_zeroed_page(in_interrupt() ? GFP_ATOMIC : GFP_KERNEL); if (!tty->read_buf) return -ENOMEM; } diff --git a/drivers/char/serial.c b/drivers/char/serial.c index 44fd4fe8f1f3..89c067688012 100644 --- a/drivers/char/serial.c +++ b/drivers/char/serial.c @@ -1127,7 +1127,7 @@ static int startup(struct async_struct * info) unsigned short ICP; #endif - page = get_free_page(GFP_KERNEL); + page = get_zeroed_page(GFP_KERNEL); if (!page) return -ENOMEM; @@ -2974,7 +2974,7 @@ static int rs_open(struct tty_struct *tty, struct file * filp) #endif if (!tmp_buf) { - page = get_free_page(GFP_KERNEL); + page = get_zeroed_page(GFP_KERNEL); if (!page) { return -ENOMEM; } @@ -4359,10 +4359,9 @@ static struct console sercons = { /* * Register console. */ -long __init serial_console_init(long kmem_start, long kmem_end) +void __init serial_console_init(void) { register_console(&sercons); - return kmem_start; } #endif diff --git a/drivers/char/tty_io.c b/drivers/char/tty_io.c index 17711734f336..f78171281801 100644 --- a/drivers/char/tty_io.c +++ b/drivers/char/tty_io.c @@ -129,7 +129,7 @@ static int tty_fasync(int fd, struct file * filp, int on); extern int sx_init (void); #endif #ifdef CONFIG_8xx -extern long console_8xx_init(long, long); +extern console_8xx_init(void); extern int rs_8xx_init(void); #endif /* CONFIG_8xx */ @@ -798,7 +798,7 @@ static int init_dev(kdev_t device, struct tty_struct **ret_tty) tp = o_tp = NULL; ltp = o_ltp = NULL; - tty = (struct tty_struct*) get_free_page(GFP_KERNEL); + tty = (struct tty_struct*) get_zeroed_page(GFP_KERNEL); if(!tty) goto fail_no_mem; initialize_tty_struct(tty); @@ -824,7 +824,7 @@ static int init_dev(kdev_t device, struct tty_struct **ret_tty) } if (driver->type == TTY_DRIVER_TYPE_PTY) { - o_tty = (struct tty_struct *) get_free_page(GFP_KERNEL); + o_tty = (struct tty_struct *) get_zeroed_page(GFP_KERNEL); if (!o_tty) goto free_mem_out; initialize_tty_struct(o_tty); @@ -2062,7 +2062,7 @@ int tty_unregister_driver(struct tty_driver *driver) * Just do some early initializations, and do the complex setup * later. */ -long __init console_init(long kmem_start, long kmem_end) +void __init console_init(void) { /* Setup the default TTY line discipline. */ memset(ldiscs, 0, sizeof(ldiscs)); @@ -2085,16 +2085,15 @@ long __init console_init(long kmem_start, long kmem_end) * inform about problems etc.. */ #ifdef CONFIG_VT - kmem_start = con_init(kmem_start); + con_init(); #endif #ifdef CONFIG_SERIAL_CONSOLE #ifdef CONFIG_8xx - kmem_start = console_8xx_init(kmem_start, kmem_end); + console_8xx_init(); #else - kmem_start = serial_console_init(kmem_start, kmem_end); + serial_console_init(); #endif /* CONFIG_8xx */ #endif - return kmem_start; } static struct tty_driver dev_tty_driver, dev_syscons_driver; @@ -2109,7 +2108,7 @@ static struct tty_driver dev_console_driver; * Ok, now we can initialize the rest of the tty devices and can count * on memory allocations, interrupts etc.. */ -int __init tty_init(void) +void __init tty_init(void) { if (sizeof(struct tty_struct) > PAGE_SIZE) panic("size of tty structure > PAGE_SIZE!"); @@ -2220,5 +2219,4 @@ int __init tty_init(void) #ifdef CONFIG_VT vcs_init(); #endif - return 0; } diff --git a/drivers/net/eepro100.c b/drivers/net/eepro100.c index e1a4a3c91784..aac698b0d417 100644 --- a/drivers/net/eepro100.c +++ b/drivers/net/eepro100.c @@ -1495,7 +1495,7 @@ speedo_rx(struct net_device *dev) rxf = sp->rx_ringp[entry] = (struct RxFD *)skb->tail; skb->dev = dev; skb_reserve(skb, sizeof(struct RxFD)); - rxf->rx_buf_addr = virt_to_le32bus(skb->tail); + rxf->rx_buf_addr = virt_to_bus(skb->tail); } else { rxf = sp->rx_ringp[entry]; } diff --git a/drivers/net/starfire.c b/drivers/net/starfire.c index 09891202cdd8..b036deee4c35 100644 --- a/drivers/net/starfire.c +++ b/drivers/net/starfire.c @@ -81,6 +81,7 @@ static int full_duplex[MAX_UNITS] = {-1, -1, -1, -1, -1, -1, -1, -1}; #endif #include +#include #include #include #include diff --git a/drivers/net/via-rhine.c b/drivers/net/via-rhine.c index 21275376c1cf..a05f54bb337a 100644 --- a/drivers/net/via-rhine.c +++ b/drivers/net/via-rhine.c @@ -111,11 +111,6 @@ static const int multicast_filter_limit = 32; #ifdef MODULE char kernel_version[] = UTS_RELEASE; -#else -#ifndef __alpha__ -#define ioremap vremap -#define iounmap vfree -#endif #endif #if defined(MODULE) && LINUX_VERSION_CODE > 0x20115 MODULE_AUTHOR("Donald Becker "); diff --git a/drivers/usb/printer.c b/drivers/usb/printer.c index dffd765590bd..8bc2cfc1c90c 100644 --- a/drivers/usb/printer.c +++ b/drivers/usb/printer.c @@ -25,37 +25,47 @@ #define NAK_TIMEOUT (HZ) /* stall wait for printer */ #define MAX_RETRY_COUNT ((60*60*HZ)/NAK_TIMEOUT) /* should not take 1 minute a page! */ +#define BIG_BUF_SIZE 8192 + +/* + * USB Printer Requests + */ +#define USB_PRINTER_REQ_GET_DEVICE_ID 0 +#define USB_PRINTER_REQ_GET_PORT_STATUS 1 +#define USB_PRINTER_REQ_SOFT_RESET 2 + #define MAX_PRINTERS 8 struct pp_usb_data { struct usb_device *pusb_dev; - __u8 isopen; /* nz if open */ - __u8 noinput; /* nz if no input stream */ + __u8 isopen; /* True if open */ + __u8 noinput; /* True if no input stream */ __u8 minor; /* minor number of device */ __u8 status; /* last status from device */ int maxin, maxout; /* max transfer size in and out */ char *obuf; /* transfer buffer (out only) */ wait_queue_head_t wait_q; /* for timeouts */ unsigned int last_error; /* save for checking */ + int bulk_in_ep; /* Bulk IN endpoint */ + int bulk_out_ep; /* Bulk OUT endpoint */ + int bulk_in_index; /* endpoint[bulk_in_index] */ + int bulk_out_index; /* endpoint[bulk_out_index] */ }; static struct pp_usb_data *minor_data[MAX_PRINTERS]; #define PPDATA(x) ((struct pp_usb_data *)(x)) -unsigned char printer_read_status(struct pp_usb_data *p) +static unsigned char printer_read_status(struct pp_usb_data *p) { __u8 status; - devrequest dr; struct usb_device *dev = p->pusb_dev; - dr.requesttype = USB_TYPE_CLASS | USB_RT_INTERFACE | 0x80; - dr.request = 1; - dr.value = 0; - dr.index = 0; - dr.length = 1; - if (dev->bus->op->control_msg(dev, usb_rcvctrlpipe(dev,0), &dr, &status, 1, HZ)) { - return 0; + if (usb_control_msg(dev, usb_rcvctrlpipe(dev,0), + USB_PRINTER_REQ_GET_PORT_STATUS, + USB_TYPE_CLASS | USB_RT_INTERFACE | USB_DIR_IN, + 0, 0, &status, 1, HZ)) { + return 0; } return status; } @@ -90,24 +100,21 @@ static int printer_check_status(struct pp_usb_data *p) return status; } -void printer_reset(struct pp_usb_data *p) +static void printer_reset(struct pp_usb_data *p) { - devrequest dr; struct usb_device *dev = p->pusb_dev; - dr.requesttype = USB_TYPE_CLASS | USB_RECIP_OTHER; - dr.request = 2; - dr.value = 0; - dr.index = 0; - dr.length = 0; - dev->bus->op->control_msg(dev, usb_sndctrlpipe(dev,0), &dr, NULL, 0, HZ); + usb_control_msg(dev, usb_sndctrlpipe(dev,0), + USB_PRINTER_REQ_SOFT_RESET, + USB_TYPE_CLASS | USB_RECIP_OTHER, + 0, 0, NULL, 0, HZ); } static int open_printer(struct inode * inode, struct file * file) { struct pp_usb_data *p; - if(MINOR(inode->i_rdev) >= MAX_PRINTERS || + if (MINOR(inode->i_rdev) >= MAX_PRINTERS || !minor_data[MINOR(inode->i_rdev)]) { return -ENODEV; } @@ -141,7 +148,7 @@ static int close_printer(struct inode * inode, struct file * file) p->isopen = 0; file->private_data = NULL; /* free the resources if the printer is no longer around */ - if(!p->pusb_dev) { + if (!p->pusb_dev) { minor_data[p->minor] = NULL; kfree(p); } @@ -158,12 +165,7 @@ static ssize_t write_printer(struct file * file, unsigned long partial; int result = USB_ST_NOERROR; int maxretry; - int endpoint_num; - struct usb_interface_descriptor *interface; - interface = p->pusb_dev->config->interface->altsetting; - endpoint_num = (interface->endpoint[1].bEndpointAddress & 0x0f); - do { char *obuf = p->obuf; unsigned long thistime; @@ -179,7 +181,7 @@ static ssize_t write_printer(struct file * file, return bytes_written ? bytes_written : -EINTR; } result = p->pusb_dev->bus->op->bulk_msg(p->pusb_dev, - usb_sndbulkpipe(p->pusb_dev, endpoint_num), + usb_sndbulkpipe(p->pusb_dev, p->bulk_out_ep), obuf, thistime, &partial, HZ*20); if (partial) { obuf += partial; @@ -187,7 +189,7 @@ static ssize_t write_printer(struct file * file, maxretry = MAX_RETRY_COUNT; } if (result == USB_ST_TIMEOUT) { /* NAK - so hold for a while */ - if(!maxretry--) + if (!maxretry--) return -ETIME; interruptible_sleep_on_timeout(&p->wait_q, NAK_TIMEOUT); continue; @@ -214,21 +216,15 @@ static ssize_t read_printer(struct file * file, char * buffer, size_t count, loff_t *ppos) { struct pp_usb_data *p = file->private_data; - int read_count; + int read_count = 0; int this_read; char buf[64]; unsigned long partial; int result; - int endpoint_num; - struct usb_interface_descriptor *interface; - interface = p->pusb_dev->config->interface->altsetting; - endpoint_num = (interface->endpoint[0].bEndpointAddress & 0x0f); - if (p->noinput) return -EINVAL; - read_count = 0; while (count) { if (signal_pending(current)) { return read_count ? read_count : -EINTR; @@ -238,7 +234,7 @@ static ssize_t read_printer(struct file * file, this_read = (count > sizeof(buf)) ? sizeof(buf) : count; result = p->pusb_dev->bus->op->bulk_msg(p->pusb_dev, - usb_rcvbulkpipe(p->pusb_dev, endpoint_num), + usb_rcvbulkpipe(p->pusb_dev, p->bulk_in_ep), buf, this_read, &partial, HZ*20); /* unlike writes, we don't retry a NAK, just stop now */ @@ -266,8 +262,8 @@ static int printer_probe(struct usb_device *dev) /* * FIXME - this will not cope with combined printer/scanners */ - if ((dev->descriptor.bDeviceClass != 7 && - dev->descriptor.bDeviceClass != 0) || + if ((dev->descriptor.bDeviceClass != USB_CLASS_PRINTER && + dev->descriptor.bDeviceClass != 0) || dev->descriptor.bNumConfigurations != 1 || dev->config[0].bNumInterfaces != 1) { return -1; @@ -275,34 +271,50 @@ static int printer_probe(struct usb_device *dev) interface = &dev->config[0].interface[0].altsetting[0]; - /* Lets be paranoid (for the moment)*/ - if (interface->bInterfaceClass != 7 || + /* Let's be paranoid (for the moment). */ + if (interface->bInterfaceClass != USB_CLASS_PRINTER || interface->bInterfaceSubClass != 1 || - (interface->bInterfaceProtocol != 2 && interface->bInterfaceProtocol != 1)|| + (interface->bInterfaceProtocol != 2 && interface->bInterfaceProtocol != 1) || interface->bNumEndpoints > 2) { return -1; } - if ((interface->endpoint[0].bEndpointAddress & 0xf0) != 0x00 || - interface->endpoint[0].bmAttributes != 0x02 || - (interface->bNumEndpoints > 1 && ( - (interface->endpoint[1].bEndpointAddress & 0xf0) != 0x80 || - interface->endpoint[1].bmAttributes != 0x02))) { + /* Does this (these) interface(s) support bulk transfers? */ + if ((interface->endpoint[0].bmAttributes & USB_ENDPOINT_XFERTYPE_MASK) + != USB_ENDPOINT_XFER_BULK) { return -1; } + if ((interface->bNumEndpoints > 1) && + ((interface->endpoint[1].bmAttributes & USB_ENDPOINT_XFERTYPE_MASK) + != USB_ENDPOINT_XFER_BULK)) { + return -1; + } + + /* + * Does this interface have at least one OUT endpoint + * that we can write to: endpoint index 0 or 1? + */ + if ((interface->endpoint[0].bEndpointAddress & USB_ENDPOINT_DIR_MASK) + != USB_DIR_OUT && + (interface->bNumEndpoints > 1 && + (interface->endpoint[1].bEndpointAddress & USB_ENDPOINT_DIR_MASK) + != USB_DIR_OUT)) { + return -1; + } for (i=0; i= MAX_PRINTERS) { + printk("No minor table space available for USB Printer\n"); return -1; } printk(KERN_INFO "USB Printer found at address %d\n", dev->devnum); if (!(dev->private = kmalloc(sizeof(struct pp_usb_data), GFP_KERNEL))) { - printk( KERN_DEBUG "usb_printer: no memory!\n"); + printk(KERN_DEBUG "usb_printer: no memory!\n"); return -1; } @@ -310,48 +322,63 @@ static int printer_probe(struct usb_device *dev) minor_data[i] = PPDATA(dev->private); minor_data[i]->minor = i; minor_data[i]->pusb_dev = dev; - /* The max packet size can't be more than 64 (& will be 64 for - * any decent bulk device); this calculation was silly. -greg - * minor_data[i]->maxout = interface->endpoint[0].wMaxPacketSize * 16; - */ - minor_data[i]->maxout = 8192; - if (minor_data[i]->maxout > PAGE_SIZE) { - minor_data[i]->maxout = PAGE_SIZE; - } - if (interface->bInterfaceProtocol != 2) + minor_data[i]->maxout = (BIG_BUF_SIZE > PAGE_SIZE) ? PAGE_SIZE : BIG_BUF_SIZE; + if (interface->bInterfaceProtocol != 2) /* if not bidirectional */ minor_data[i]->noinput = 1; - else { - minor_data[i]->maxin = interface->endpoint[1].wMaxPacketSize; + + minor_data[i]->bulk_out_index = + ((interface->endpoint[0].bEndpointAddress & USB_ENDPOINT_DIR_MASK) + == USB_DIR_OUT) ? 0 : 1; + minor_data[i]->bulk_in_index = minor_data[i]->noinput ? -1 : + (minor_data[i]->bulk_out_index == 0) ? 1 : 0; + minor_data[i]->bulk_in_ep = minor_data[i]->noinput ? -1 : + interface->endpoint[minor_data[i]->bulk_in_index].bEndpointAddress & + USB_ENDPOINT_NUMBER_MASK; + minor_data[i]->bulk_out_ep = + interface->endpoint[minor_data[i]->bulk_out_index].bEndpointAddress & + USB_ENDPOINT_NUMBER_MASK; + if (interface->bInterfaceProtocol == 2) { /* if bidirectional */ + minor_data[i]->maxin = + interface->endpoint[minor_data[i]->bulk_in_index].wMaxPacketSize; } if (usb_set_configuration(dev, dev->config[0].bConfigurationValue)) { printk(KERN_INFO " Failed usb_set_configuration: printer\n"); return -1; } + + printk(KERN_INFO "USB Printer Summary:\n"); + printk(KERN_INFO "index=%d, maxout=%d, noinput=%d\n", + i, minor_data[i]->maxout, minor_data[i]->noinput); + printk(KERN_INFO "bulk_in_ix=%d, bulk_in_ep=%d, bulk_out_ix=%d, bulk_out_ep=%d\n", + minor_data[i]->bulk_in_index, + minor_data[i]->bulk_in_ep, + minor_data[i]->bulk_out_index, + minor_data[i]->bulk_out_ep); + #if 0 { __u8 status; __u8 ieee_id[64]; - devrequest dr; - - /* Lets get the device id if possible */ - dr.requesttype = USB_TYPE_CLASS | USB_RT_INTERFACE | 0x80; - dr.request = 0; - dr.value = 0; - dr.index = 0; - dr.length = sizeof(ieee_id) - 1; - if (dev->bus->op->control_msg(dev, usb_rcvctrlpipe(dev,0), &dr, ieee_id, sizeof(ieee_id)-1, HZ) == 0) { + + /* Let's get the device id if possible. */ + if (usb_control_msg(dev, usb_rcvctrlpipe(dev,0), + USB_PRINTER_REQ_GET_DEVICE_ID, + USB_TYPE_CLASS | USB_RT_INTERFACE | USB_DIR_IN, + 0, 0, ieee_id, + sizeof(ieee_id)-1, HZ) == 0) { if (ieee_id[1] < sizeof(ieee_id) - 1) ieee_id[ieee_id[1]+2] = '\0'; else ieee_id[sizeof(ieee_id)-1] = '\0'; - printk(KERN_INFO " Printer ID is %s\n", &ieee_id[2]); + printk(KERN_INFO " USB Printer ID is %s\n", + &ieee_id[2]); } status = printer_read_status(PPDATA(dev->private)); printk(KERN_INFO " Status is %s,%s,%s\n", - (status & 0x10) ? "Selected" : "Not Selected", - (status & 0x20) ? "No Paper" : "Paper", - (status & 0x08) ? "No Error" : "Error"); + (status & LP_PSELECD) ? "Selected" : "Not Selected", + (status & LP_POUTPA) ? "No Paper" : "Paper", + (status & LP_PERRORP) ? "No Error" : "Error"); } #endif return 0; @@ -397,7 +424,13 @@ static struct usb_driver printer_driver = { int usb_printer_init(void) { - usb_register(&printer_driver); + if (usb_register(&printer_driver)) { + printk(KERN_ERR "USB Printer driver cannot register: " + "minor number %d already in use\n", + printer_driver.minor); + return 1; + } + printk(KERN_INFO "USB Printer support registered.\n"); return 0; } diff --git a/fs/buffer.c b/fs/buffer.c index f3c3f11b7b1c..c065cfdb2acf 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -97,7 +97,7 @@ static kmem_cache_t *bh_cachep; static int grow_buffers(int size); /* This is used by some architectures to estimate available memory. */ -atomic_t buffermem = ATOMIC_INIT(0); +atomic_t buffermem_pages = ATOMIC_INIT(0); /* Here is the parameter block for the bdflush process. If you add or * remove any of the parameters, make sure to update kernel/sysctl.c. @@ -827,7 +827,7 @@ static int balance_dirty_state(kdev_t dev) unsigned long dirty, tot, hard_dirty_limit, soft_dirty_limit; dirty = size_buffers_type[BUF_DIRTY] >> PAGE_SHIFT; - tot = nr_lru_pages + nr_free_pages - nr_free_bigpages; + tot = nr_lru_pages + nr_free_pages + nr_free_highpages; hard_dirty_limit = tot * bdf_prm.b_un.nfract / 100; soft_dirty_limit = hard_dirty_limit >> 1; @@ -1267,7 +1267,7 @@ int block_flushpage(struct inode *inode, struct page *page, unsigned long offset */ if (!offset) { if (!try_to_free_buffers(page)) { - atomic_add(PAGE_CACHE_SIZE, &buffermem); + atomic_inc(&buffermem_pages); return 0; } } @@ -1834,12 +1834,12 @@ int brw_kiovec(int rw, int nr, struct kiobuf *iovec[], dprintk ("iobuf %d %d %d\n", offset, length, size); for (pageind = 0; pageind < iobuf->nr_pages; pageind++) { - page = iobuf->pagelist[pageind]; map = iobuf->maplist[pageind]; - if (map && PageBIGMEM(map)) { + if (map && PageHighMem(map)) { err = -EIO; goto error; } + page = page_address(map); while (length > 0) { blocknr = b[bufind++]; @@ -2115,7 +2115,7 @@ static int grow_buffers(int size) page_map = mem_map + MAP_NR(page); page_map->buffers = bh; lru_cache_add(page_map); - atomic_add(PAGE_SIZE, &buffermem); + atomic_inc(&buffermem_pages); return 1; no_buffer_head: @@ -2208,7 +2208,8 @@ void show_buffers(void) int nlist; static char *buf_types[NR_LIST] = { "CLEAN", "LOCKED", "DIRTY" }; - printk("Buffer memory: %6dkB\n", atomic_read(&buffermem) >> 10); + printk("Buffer memory: %6dkB\n", + atomic_read(&buffermem_pages) << (PAGE_SHIFT-10)); #ifdef __SMP__ /* trylock does nothing on UP and so we could deadlock */ if (!spin_trylock(&lru_list_lock)) @@ -2246,7 +2247,7 @@ void show_buffers(void) * Use gfp() for the hash table to decrease TLB misses, use * SLAB cache for buffer heads. */ -void __init buffer_init(unsigned long memory_size) +void __init buffer_init(unsigned long mempages) { int order, i; unsigned int nr_hash; @@ -2254,9 +2255,11 @@ void __init buffer_init(unsigned long memory_size) /* The buffer cache hash table is less important these days, * trim it a bit. */ - memory_size >>= 14; - memory_size *= sizeof(struct buffer_head *); - for (order = 0; (PAGE_SIZE << order) < memory_size; order++) + mempages >>= 14; + + mempages *= sizeof(struct buffer_head *); + + for (order = 0; (1 << order) < mempages; order++) ; /* try to allocate something until we get it or we're asking diff --git a/fs/dcache.c b/fs/dcache.c index 5f9c066a45e0..b6f7a7203b5b 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -420,7 +420,7 @@ int shrink_dcache_memory(int priority, unsigned int gfp_mask) unlock_kernel(); /* FIXME: kmem_cache_shrink here should tell us the number of pages freed, and it should - work in a __GFP_DMA/__GFP_BIGMEM behaviour + work in a __GFP_DMA/__GFP_HIGHMEM behaviour to free only the interesting pages in function of the needs of the current allocation. */ kmem_cache_shrink(dentry_cache); diff --git a/fs/exec.c b/fs/exec.c index f56d8b3523e9..1961ec33a49a 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -31,6 +31,8 @@ #include #include #include +#include +#include #include #include @@ -212,20 +214,42 @@ int copy_strings(int argc,char ** argv, struct linux_binprm *bprm) /* XXX: add architecture specific overflow check here. */ pos = bprm->p; - while (len>0) { - char *pag; + while (len > 0) { + char *kaddr; + int i, new, err; + struct page *page; int offset, bytes_to_copy; offset = pos % PAGE_SIZE; - if (!(pag = (char *) bprm->page[pos/PAGE_SIZE]) && - !(pag = (char *) bprm->page[pos/PAGE_SIZE] = - (unsigned long *) get_free_page(GFP_USER))) - return -ENOMEM; + i = pos/PAGE_SIZE; + page = bprm->page[i]; + new = 0; + if (!page) { + /* + * Cannot yet use highmem page because + * we cannot sleep with a kmap held. + */ + page = __get_pages(GFP_USER, 0); + bprm->page[i] = page; + if (!page) + return -ENOMEM; + new = 1; + } + kaddr = (char *)kmap(page, KM_WRITE); + if (new && offset) + memset(kaddr, 0, offset); bytes_to_copy = PAGE_SIZE - offset; - if (bytes_to_copy > len) + if (bytes_to_copy > len) { bytes_to_copy = len; - if (copy_from_user(pag + offset, str, bytes_to_copy)) + if (new) + memset(kaddr+offset+len, 0, PAGE_SIZE-offset-len); + } + err = copy_from_user(kaddr + offset, str, bytes_to_copy); + flush_page_to_ram(kaddr); + kunmap((unsigned long)kaddr, KM_WRITE); + + if (err) return -EFAULT; pos += bytes_to_copy; @@ -647,14 +671,22 @@ void remove_arg_zero(struct linux_binprm *bprm) { if (bprm->argc) { unsigned long offset; - char * page; + char * kaddr; + struct page *page; + offset = bprm->p % PAGE_SIZE; - page = (char*)bprm->page[bprm->p/PAGE_SIZE]; - while(bprm->p++,*(page+offset++)) - if(offset==PAGE_SIZE){ - offset=0; - page = (char*)bprm->page[bprm->p/PAGE_SIZE]; - } + goto inside; + + while (bprm->p++, *(kaddr+offset++)) { + if (offset != PAGE_SIZE) + continue; + offset = 0; + kunmap((unsigned long)kaddr, KM_WRITE); +inside: + page = bprm->page[bprm->p/PAGE_SIZE]; + kaddr = (char *)kmap(page, KM_WRITE); + } + kunmap((unsigned long)kaddr, KM_WRITE); bprm->argc--; } } @@ -683,8 +715,8 @@ int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs) bprm->dentry = NULL; bprm_loader.p = PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *); - for (i=0 ; ii_wait); INIT_LIST_HEAD(&inode->i_hash); + INIT_LIST_HEAD(&inode->i_pages); INIT_LIST_HEAD(&inode->i_dentry); sema_init(&inode->i_sem, 1); spin_lock_init(&inode->i_shared_lock); @@ -401,7 +402,7 @@ int shrink_icache_memory(int priority, int gfp_mask) prune_icache(count); /* FIXME: kmem_cache_shrink here should tell us the number of pages freed, and it should - work in a __GFP_DMA/__GFP_BIGMEM behaviour + work in a __GFP_DMA/__GFP_HIGHMEM behaviour to free only the interesting pages in function of the needs of the current allocation. */ kmem_cache_shrink(inode_cachep); diff --git a/fs/iobuf.c b/fs/iobuf.c index b46a13bfd8eb..eaabf2f7c5c0 100644 --- a/fs/iobuf.c +++ b/fs/iobuf.c @@ -50,7 +50,6 @@ int alloc_kiovec(int nr, struct kiobuf **bufp) init_waitqueue_head(&iobuf->wait_queue); iobuf->end_io = simple_wakeup_kiobuf; iobuf->array_len = KIO_STATIC_PAGES; - iobuf->pagelist = iobuf->page_array; iobuf->maplist = iobuf->map_array; *bufp++ = iobuf; } @@ -65,50 +64,35 @@ void free_kiovec(int nr, struct kiobuf **bufp) for (i = 0; i < nr; i++) { iobuf = bufp[i]; - if (iobuf->array_len > KIO_STATIC_PAGES) { - kfree (iobuf->pagelist); + if (iobuf->array_len > KIO_STATIC_PAGES) kfree (iobuf->maplist); - } kmem_cache_free(kiobuf_cachep, bufp[i]); } } int expand_kiobuf(struct kiobuf *iobuf, int wanted) { - unsigned long * pagelist; struct page ** maplist; if (iobuf->array_len >= wanted) return 0; - pagelist = (unsigned long *) - kmalloc(wanted * sizeof(unsigned long), GFP_KERNEL); - if (!pagelist) - return -ENOMEM; - maplist = (struct page **) kmalloc(wanted * sizeof(struct page **), GFP_KERNEL); - if (!maplist) { - kfree(pagelist); + if (!maplist) return -ENOMEM; - } /* Did it grow while we waited? */ if (iobuf->array_len >= wanted) { - kfree(pagelist); kfree(maplist); return 0; } - memcpy (pagelist, iobuf->pagelist, wanted * sizeof(unsigned long)); memcpy (maplist, iobuf->maplist, wanted * sizeof(struct page **)); - if (iobuf->array_len > KIO_STATIC_PAGES) { - kfree (iobuf->pagelist); + if (iobuf->array_len > KIO_STATIC_PAGES) kfree (iobuf->maplist); - } - iobuf->pagelist = pagelist; iobuf->maplist = maplist; iobuf->array_len = wanted; return 0; diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 6515e0d523c9..b7ec225ac8b7 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -308,8 +308,7 @@ static struct page *try_to_get_dirent_page(struct file *file, __u32 cookie, int struct nfs_readdirres rd_res; struct dentry *dentry = file->f_dentry; struct inode *inode = dentry->d_inode; - struct page *page, **hash; - unsigned long page_cache; + struct page *page, **hash, *page_cache; long offset; __u32 *cookiep; @@ -341,14 +340,14 @@ repeat: goto unlock_out; } - page = page_cache_entry(page_cache); + page = page_cache; if (add_to_page_cache_unique(page, inode, offset, hash)) { page_cache_release(page); goto repeat; } rd_args.fh = NFS_FH(dentry); - rd_res.buffer = (char *)page_cache; + rd_res.buffer = (char *)page_address(page_cache); rd_res.bufsiz = PAGE_CACHE_SIZE; rd_res.cookie = *cookiep; do { diff --git a/fs/nfs/symlink.c b/fs/nfs/symlink.c index 6cd892740597..6b0d0f05b904 100644 --- a/fs/nfs/symlink.c +++ b/fs/nfs/symlink.c @@ -59,8 +59,7 @@ struct inode_operations nfs_symlink_inode_operations = { static struct page *try_to_get_symlink_page(struct dentry *dentry, struct inode *inode) { struct nfs_readlinkargs rl_args; - struct page *page, **hash; - unsigned long page_cache; + struct page *page, **hash, *page_cache; page = NULL; page_cache = page_cache_alloc(); @@ -75,7 +74,7 @@ repeat: goto unlock_out; } - page = page_cache_entry(page_cache); + page = page_cache; if (add_to_page_cache_unique(page, inode, 0, hash)) { page_cache_release(page); goto repeat; @@ -86,7 +85,7 @@ repeat: * XDR response verification will NULL terminate it. */ rl_args.fh = NFS_FH(dentry); - rl_args.buffer = (const void *)page_cache; + rl_args.buffer = (const void *)page_address(page_cache); if (rpc_call(NFS_CLIENT(inode), NFSPROC_READLINK, &rl_args, NULL, 0) < 0) goto error; diff --git a/fs/proc/array.c b/fs/proc/array.c index f4cd01a7c4e9..101a0ad9c0bd 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -386,8 +386,8 @@ static int get_meminfo(char * buffer) i.sharedram >> 10, i.bufferram >> 10, atomic_read(&page_cache_size) << (PAGE_SHIFT - 10), - i.totalbig >> 10, - i.freebig >> 10, + i.totalhigh >> 10, + i.freehigh >> 10, i.totalswap >> 10, i.freeswap >> 10); } @@ -407,7 +407,7 @@ static int get_cmdline(char * buffer) return sprintf(buffer, "%s\n", saved_command_line); } -static unsigned long get_phys_addr(struct mm_struct * mm, unsigned long ptr) +static struct page * get_phys_page(struct mm_struct * mm, unsigned long ptr) { pgd_t *page_dir; pmd_t *page_middle; @@ -434,41 +434,41 @@ static unsigned long get_phys_addr(struct mm_struct * mm, unsigned long ptr) pte = *pte_offset(page_middle,ptr); if (!pte_present(pte)) return 0; - return pte_page(pte) + (ptr & ~PAGE_MASK); + return pte_page(pte); } -#include +#include static int get_array(struct mm_struct *mm, unsigned long start, unsigned long end, char * buffer) { unsigned long addr; int size = 0, result = 0; - char c; + char *buf, c; if (start >= end) return result; for (;;) { - addr = get_phys_addr(mm, start); - if (!addr) + struct page *page = get_phys_page(mm, start); + if (!page) return result; - addr = kmap(addr, KM_READ); + addr = kmap(page, KM_READ); + buf = (char *) (addr + (start & ~PAGE_MASK)); do { - c = *(char *) addr; + c = *buf; if (!c) result = size; - if (size < PAGE_SIZE) - buffer[size++] = c; - else { + if (size >= PAGE_SIZE) { kunmap(addr, KM_READ); return result; } - addr++; + buffer[size++] = c; + buf++; start++; if (!c && start >= end) { kunmap(addr, KM_READ); return result; } - } while (addr & ~PAGE_MASK); + } while (~PAGE_MASK & (unsigned long)buf); kunmap(addr, KM_READ); } return result; diff --git a/fs/proc/mem.c b/fs/proc/mem.c index f9fcb0970cc4..90cd797224df 100644 --- a/fs/proc/mem.c +++ b/fs/proc/mem.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include #include @@ -79,9 +79,10 @@ static ssize_t mem_read(struct file * file, char * buf, pgd_t *page_dir; pmd_t *page_middle; pte_t pte; - char * page; + struct page * page; struct task_struct * tsk; unsigned long addr; + unsigned long maddr; /* temporary mapped address */ char *tmp; ssize_t scount, i; @@ -102,7 +103,7 @@ static ssize_t mem_read(struct file * file, char * buf, if (pgd_none(*page_dir)) break; if (pgd_bad(*page_dir)) { - printk("Bad page dir entry %08lx\n", pgd_val(*page_dir)); + pgd_ERROR(*page_dir); pgd_clear(page_dir); break; } @@ -110,20 +111,20 @@ static ssize_t mem_read(struct file * file, char * buf, if (pmd_none(*page_middle)) break; if (pmd_bad(*page_middle)) { - printk("Bad page middle entry %08lx\n", pmd_val(*page_middle)); + pmd_ERROR(*page_middle); pmd_clear(page_middle); break; } pte = *pte_offset(page_middle,addr); if (!pte_present(pte)) break; - page = (char *) pte_page(pte) + (addr & ~PAGE_MASK); + page = pte_page(pte); i = PAGE_SIZE-(addr & ~PAGE_MASK); if (i > scount) i = scount; - page = (char *) kmap((unsigned long) page, KM_READ); - copy_to_user(tmp, page, i); - kunmap((unsigned long) page, KM_READ); + maddr = kmap(page, KM_READ); + copy_to_user(tmp, (char *)maddr + (addr & ~PAGE_MASK), i); + kunmap(maddr, KM_READ); addr += i; tmp += i; scount -= i; @@ -141,9 +142,10 @@ static ssize_t mem_write(struct file * file, char * buf, pgd_t *page_dir; pmd_t *page_middle; pte_t pte; - char * page; + struct page * page; struct task_struct * tsk; unsigned long addr; + unsigned long maddr; /* temporary mapped address */ char *tmp; long i; @@ -159,7 +161,7 @@ static ssize_t mem_write(struct file * file, char * buf, if (pgd_none(*page_dir)) break; if (pgd_bad(*page_dir)) { - printk("Bad page dir entry %08lx\n", pgd_val(*page_dir)); + pgd_ERROR(*page_dir); pgd_clear(page_dir); break; } @@ -167,7 +169,7 @@ static ssize_t mem_write(struct file * file, char * buf, if (pmd_none(*page_middle)) break; if (pmd_bad(*page_middle)) { - printk("Bad page middle entry %08lx\n", pmd_val(*page_middle)); + pmd_ERROR(*page_middle); pmd_clear(page_middle); break; } @@ -176,13 +178,13 @@ static ssize_t mem_write(struct file * file, char * buf, break; if (!pte_write(pte)) break; - page = (char *) pte_page(pte) + (addr & ~PAGE_MASK); + page = pte_page(pte); i = PAGE_SIZE-(addr & ~PAGE_MASK); if (i > count) i = count; - page = (unsigned long) kmap((unsigned long) page, KM_WRITE); - copy_from_user(page, tmp, i); - kunmap((unsigned long) page, KM_WRITE); + maddr = kmap(page, KM_WRITE); + copy_from_user((char *)maddr + (addr & ~PAGE_MASK), tmp, i); + kunmap(maddr, KM_WRITE); addr += i; tmp += i; count -= i; @@ -248,14 +250,14 @@ int mem_mmap(struct file * file, struct vm_area_struct * vma) if (pgd_none(*src_dir)) return -EINVAL; if (pgd_bad(*src_dir)) { - printk("Bad source page dir entry %08lx\n", pgd_val(*src_dir)); + pgd_ERROR(*src_dir); return -EINVAL; } src_middle = pmd_offset(src_dir, stmp); if (pmd_none(*src_middle)) return -EINVAL; if (pmd_bad(*src_middle)) { - printk("Bad source page middle entry %08lx\n", pmd_val(*src_middle)); + pmd_ERROR(*src_middle); return -EINVAL; } src_table = pte_offset(src_middle, stmp); @@ -301,9 +303,9 @@ int mem_mmap(struct file * file, struct vm_area_struct * vma) set_pte(src_table, pte_mkdirty(*src_table)); set_pte(dest_table, *src_table); - mapnr = MAP_NR(pte_page(*src_table)); + mapnr = pte_pagenr(*src_table); if (mapnr < max_mapnr) - get_page(mem_map + MAP_NR(pte_page(*src_table))); + get_page(mem_map + pte_pagenr(*src_table)); stmp += PAGE_SIZE; dtmp += PAGE_SIZE; diff --git a/include/asm-i386/bigmem.h b/include/asm-i386/bigmem.h deleted file mode 100644 index 1c5c4cf4b4d4..000000000000 --- a/include/asm-i386/bigmem.h +++ /dev/null @@ -1,69 +0,0 @@ -/* - * bigmem.h: virtual kernel memory mappings for big memory - * - * Used in CONFIG_BIGMEM systems for memory pages which are not - * addressable by direct kernel virtual adresses. - * - * Copyright (C) 1999 Gerhard Wichert, Siemens AG - * Gerhard.Wichert@pdb.siemens.de - */ - -#ifndef _ASM_BIGMEM_H -#define _ASM_BIGMEM_H - -#include - -#define BIGMEM_DEBUG /* undef for production */ - -/* declarations for bigmem.c */ -extern unsigned long bigmem_start, bigmem_end; -extern int nr_free_bigpages; - -extern pte_t *kmap_pte; -extern pgprot_t kmap_prot; - -extern void kmap_init(void) __init; - -/* kmap helper functions necessary to access the bigmem pages in kernel */ -#include -#include - -extern inline unsigned long kmap(unsigned long kaddr, enum km_type type) -{ - if (__pa(kaddr) < bigmem_start) - return kaddr; - { - enum fixed_addresses idx = type+KM_TYPE_NR*smp_processor_id(); - unsigned long vaddr = __fix_to_virt(FIX_KMAP_BEGIN+idx); - -#ifdef BIGMEM_DEBUG - if (!pte_none(*(kmap_pte-idx))) - { - __label__ here; - here: - printk(KERN_ERR "not null pte on CPU %d from %p\n", - smp_processor_id(), &&here); - } -#endif - set_pte(kmap_pte-idx, mk_pte(kaddr & PAGE_MASK, kmap_prot)); - __flush_tlb_one(vaddr); - - return vaddr | (kaddr & ~PAGE_MASK); - } -} - -extern inline void kunmap(unsigned long vaddr, enum km_type type) -{ -#ifdef BIGMEM_DEBUG - enum fixed_addresses idx = type+KM_TYPE_NR*smp_processor_id(); - if ((vaddr & PAGE_MASK) == __fix_to_virt(FIX_KMAP_BEGIN+idx)) - { - /* force other mappings to Oops if they'll try to access - this pte without first remap it */ - pte_clear(kmap_pte-idx); - __flush_tlb_one(vaddr); - } -#endif -} - -#endif /* _ASM_BIGMEM_H */ diff --git a/include/asm-i386/bugs.h b/include/asm-i386/bugs.h index 1914385eb940..4ae25be50754 100644 --- a/include/asm-i386/bugs.h +++ b/include/asm-i386/bugs.h @@ -236,6 +236,7 @@ static void __init check_amd_k6(void) * have the F0 0F bug, which lets nonpriviledged users lock up the system: */ +#ifndef CONFIG_M686 extern void trap_init_f00f_bug(void); static void __init check_pentium_f00f(void) @@ -250,6 +251,7 @@ static void __init check_pentium_f00f(void) trap_init_f00f_bug(); } } +#endif /* * Perform the Cyrix 5/2 test. A Cyrix won't change @@ -424,7 +426,9 @@ static void __init check_bugs(void) check_hlt(); check_popad(); check_amd_k6(); +#ifndef CONFIG_M686 check_pentium_f00f(); +#endif check_cyrix_coma(); system_utsname.machine[1] = '0' + boot_cpu_data.x86; } diff --git a/include/asm-i386/fixmap.h b/include/asm-i386/fixmap.h index 34c82dbe01d0..01f6a1871421 100644 --- a/include/asm-i386/fixmap.h +++ b/include/asm-i386/fixmap.h @@ -17,7 +17,7 @@ #include #include #include -#ifdef CONFIG_BIGMEM +#ifdef CONFIG_HIGHMEM #include #include #endif @@ -34,7 +34,7 @@ * * these 'compile-time allocated' memory buffers are * fixed-size 4k pages. (or larger if used with an increment - * bigger than 1) use fixmap_set(idx,phys) to associate + * highger than 1) use fixmap_set(idx,phys) to associate * physical memory with fixmap indices. * * TLB entries of such buffers will not be flushed across @@ -61,7 +61,7 @@ enum fixed_addresses { FIX_LI_PCIA, /* Lithium PCI Bridge A */ FIX_LI_PCIB, /* Lithium PCI Bridge B */ #endif -#ifdef CONFIG_BIGMEM +#ifdef CONFIG_HIGHMEM FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */ FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1, #endif diff --git a/include/asm-i386/highmem.h b/include/asm-i386/highmem.h new file mode 100644 index 000000000000..bd5564aea8c9 --- /dev/null +++ b/include/asm-i386/highmem.h @@ -0,0 +1,85 @@ +/* + * highmem.h: virtual kernel memory mappings for high memory + * + * Used in CONFIG_HIGHMEM systems for memory pages which + * are not addressable by direct kernel virtual adresses. + * + * Copyright (C) 1999 Gerhard Wichert, Siemens AG + * Gerhard.Wichert@pdb.siemens.de + * + * + * Redesigned the x86 32-bit VM architecture to deal with + * up to 16 Terrabyte physical memory. With current x86 CPUs + * we now support up to 64 Gigabytes physical RAM. + * + * Copyright (C) 1999 Ingo Molnar + */ + +#ifndef _ASM_HIGHMEM_H +#define _ASM_HIGHMEM_H + +#include + +/* undef for production */ +#define HIGHMEM_DEBUG 1 + +/* declarations for highmem.c */ +extern unsigned long highstart_pfn, highend_pfn; + +extern pte_t *kmap_pte; +extern pgprot_t kmap_prot; + +extern void kmap_init(void) __init; + +/* kmap helper functions necessary to access the highmem pages in kernel */ +#include +#include + +extern inline unsigned long kmap(struct page *page, enum km_type type) +{ + if (page < highmem_start_page) + return page_address(page); + { + enum fixed_addresses idx = type+KM_TYPE_NR*smp_processor_id(); + unsigned long vaddr = __fix_to_virt(FIX_KMAP_BEGIN+idx); + +#if HIGHMEM_DEBUG + if (!pte_none(*(kmap_pte-idx))) + { + __label__ here; + here: + printk(KERN_ERR "not null pte on CPU %d from %p\n", + smp_processor_id(), &&here); + } +#endif + set_pte(kmap_pte-idx, mk_pte(page, kmap_prot)); + __flush_tlb_one(vaddr); + + return vaddr; + } +} + +extern inline void kunmap(unsigned long vaddr, enum km_type type) +{ +#if HIGHMEM_DEBUG + enum fixed_addresses idx = type+KM_TYPE_NR*smp_processor_id(); + if ((vaddr & PAGE_MASK) == __fix_to_virt(FIX_KMAP_BEGIN+idx)) + { + /* force other mappings to Oops if they'll try to access + this pte without first remap it */ + pte_clear(kmap_pte-idx); + __flush_tlb_one(vaddr); + } +#endif +} + +extern inline void kmap_check(void) +{ +#if HIGHMEM_DEBUG + int idx_base = KM_TYPE_NR*smp_processor_id(), i; + for (i = idx_base; i < idx_base+KM_TYPE_NR; i++) + if (!pte_none(*(kmap_pte-i))) + BUG(); +#endif +} +#endif /* _ASM_HIGHMEM_H */ diff --git a/include/asm-i386/io.h b/include/asm-i386/io.h index 906fca475d84..95cbb151df76 100644 --- a/include/asm-i386/io.h +++ b/include/asm-i386/io.h @@ -103,28 +103,27 @@ __OUTS(l) #include #include -#define __io_virt(x) ((void *)(PAGE_OFFSET | (unsigned long)(x))) -#define __io_phys(x) ((unsigned long)(x) & ~PAGE_OFFSET) +/* + * Temporary debugging check to catch old code using + * unmapped ISA addresses. Will be removed in 2.4. + */ +#define __io_virt(x) ((unsigned long)(x) < PAGE_OFFSET ? \ + ({ __label__ __l; __l: printk("io mapaddr %p not valid at %p!\n", (char *)(x), &&__l); __va(x); }) : (char *)(x)) +#define __io_phys(x) ((unsigned long)(x) < PAGE_OFFSET ? \ + ({ __label__ __l; __l: printk("io mapaddr %p not valid at %p!\n", (char *)(x), &&__l); (unsigned long)(x); }) : __pa(x)) + /* * Change virtual addresses to physical addresses and vv. * These are pretty trivial */ extern inline unsigned long virt_to_phys(volatile void * address) { -#ifdef CONFIG_BIGMEM return __pa(address); -#else - return __io_phys(address); -#endif } extern inline void * phys_to_virt(unsigned long address) { -#ifdef CONFIG_BIGMEM return __va(address); -#else - return __io_virt(address); -#endif } extern void * __ioremap(unsigned long offset, unsigned long size, unsigned long flags); @@ -177,6 +176,23 @@ extern void iounmap(void *addr); #define memcpy_fromio(a,b,c) memcpy((a),__io_virt(b),(c)) #define memcpy_toio(a,b,c) memcpy(__io_virt(a),(b),(c)) +/* + * ISA space is 'always mapped' on a typical x86 system, no need to + * explicitly ioremap() it. The fact that the ISA IO space is mapped + * to PAGE_OFFSET is pure coincidence - it does not mean ISA values + * are physical addresses. The following constant pointer can be + * used as the IO-area pointer (it can be iounmapped as well, so the + * analogy with PCI is quite large): + */ +#define __ISA_IO_base ((char *)(PAGE_OFFSET)) + +#define isa_readb(a) readb(__ISA_IO_base + (a)) +#define isa_readw(a) readb(__ISA_IO_base + (a)) +#define isa_readl(a) readb(__ISA_IO_base + (a)) +#define isa_writeb(b,a) writeb(b,__ISA_IO_base + (a)) +#define isa_writew(w,a) writeb(w,__ISA_IO_base + (a)) +#define isa_writel(l,a) writeb(l,__ISA_IO_base + (a)) + /* * Again, i386 does not require mem IO specific function. */ diff --git a/include/asm-i386/page.h b/include/asm-i386/page.h index 1eb4ac093688..11577168af04 100644 --- a/include/asm-i386/page.h +++ b/include/asm-i386/page.h @@ -9,8 +9,6 @@ #ifdef __KERNEL__ #ifndef __ASSEMBLY__ -#define STRICT_MM_TYPECHECKS - #include #ifdef CONFIG_X86_USE_3DNOW @@ -32,13 +30,19 @@ #endif -#ifdef STRICT_MM_TYPECHECKS /* * These are used to make use of C type-checking.. */ +#if CONFIG_X86_PAE +typedef struct { unsigned long long pte; } pte_t; +typedef struct { unsigned long long pmd; } pmd_t; +typedef struct { unsigned long long pgd; } pgd_t; +#else typedef struct { unsigned long pte; } pte_t; typedef struct { unsigned long pmd; } pmd_t; typedef struct { unsigned long pgd; } pgd_t; +#endif + typedef struct { unsigned long pgprot; } pgprot_t; #define pte_val(x) ((x).pte) @@ -51,26 +55,6 @@ typedef struct { unsigned long pgprot; } pgprot_t; #define __pgd(x) ((pgd_t) { (x) } ) #define __pgprot(x) ((pgprot_t) { (x) } ) -#else -/* - * .. while these make it easier on the compiler - */ -typedef unsigned long pte_t; -typedef unsigned long pmd_t; -typedef unsigned long pgd_t; -typedef unsigned long pgprot_t; - -#define pte_val(x) (x) -#define pmd_val(x) (x) -#define pgd_val(x) (x) -#define pgprot_val(x) (x) - -#define __pte(x) (x) -#define __pmd(x) (x) -#define __pgd(x) (x) -#define __pgprot(x) (x) - -#endif #endif /* !__ASSEMBLY__ */ /* to align the pointer to the (next) page boundary */ @@ -93,8 +77,16 @@ typedef unsigned long pgprot_t; #ifndef __ASSEMBLY__ +extern int console_loglevel; + +/* + * Tell the user there is some problem. Beep too, so we can + * see^H^H^Hhear bugs in early bootup as well! + */ #define BUG() do { \ + __asm__ __volatile__ ("movb $0x3,%al; outb %al,$0x61"); \ printk("kernel BUG at %s:%d!\n", __FILE__, __LINE__); \ + console_loglevel = 0; \ __asm__ __volatile__(".byte 0x0f,0x0b"); \ } while (0) diff --git a/include/asm-i386/pgtable-2level.h b/include/asm-i386/pgtable-2level.h new file mode 100644 index 000000000000..a8a05239cde1 --- /dev/null +++ b/include/asm-i386/pgtable-2level.h @@ -0,0 +1,62 @@ +#ifndef _I386_PGTABLE_2LEVEL_H +#define _I386_PGTABLE_2LEVEL_H + +/* + * traditional i386 two-level paging structure: + */ + +#define PGDIR_SHIFT 22 +#define PTRS_PER_PGD 1024 + +/* + * the i386 is two-level, so we don't really have any + * PMD directory physically. + */ +#define PMD_SHIFT 22 +#define PTRS_PER_PMD 1 + +#define PTRS_PER_PTE 1024 + +#define pte_ERROR(e) \ + printk("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e)) +#define pmd_ERROR(e) \ + printk("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pmd_val(e)) +#define pgd_ERROR(e) \ + printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e)) + +/* + * The "pgd_xxx()" functions here are trivial for a folded two-level + * setup: the pgd is never bad, and a pmd always exists (as it's folded + * into the pgd entry) + */ +extern inline int pgd_none(pgd_t pgd) { return 0; } +extern inline int pgd_bad(pgd_t pgd) { return 0; } +extern inline int pgd_present(pgd_t pgd) { return 1; } +#define pgd_clear(xp) do { pgd_val(*(xp)) = 0; } while (0) + +#define pgd_page(pgd) \ +((unsigned long) __va(pgd_val(pgd) & PAGE_MASK)) + +extern inline pmd_t * pmd_offset(pgd_t * dir, unsigned long address) +{ + return (pmd_t *) dir; +} + +extern __inline__ pmd_t *get_pmd_fast(void) +{ + return (pmd_t *)0; +} + +extern __inline__ void free_pmd_fast(pmd_t *pmd) { } +extern __inline__ void free_pmd_slow(pmd_t *pmd) { } + +extern inline pmd_t * pmd_alloc(pgd_t *pgd, unsigned long address) +{ + if (!pgd) + BUG(); + return (pmd_t *) pgd; +} + +#define SWP_ENTRY(type,offset) __pte((((type) << 1) | ((offset) << 8))) + +#endif /* _I386_PGTABLE_2LEVEL_H */ diff --git a/include/asm-i386/pgtable-3level.h b/include/asm-i386/pgtable-3level.h new file mode 100644 index 000000000000..99d718115d11 --- /dev/null +++ b/include/asm-i386/pgtable-3level.h @@ -0,0 +1,131 @@ +#ifndef _I386_PGTABLE_3LEVEL_H +#define _I386_PGTABLE_3LEVEL_H + +/* + * Intel Physical Address Extension (PAE) Mode - three-level page + * tables on PPro+ CPUs. + * + * Copyright (C) 1999 Ingo Molnar + */ + +/* + * PGDIR_SHIFT determines what a top-level page table entry can map + */ +#define PGDIR_SHIFT 30 +#define PTRS_PER_PGD 4 + +/* + * PMD_SHIFT determines the size of the area a middle-level + * page table can map + */ +#define PMD_SHIFT 21 +#define PTRS_PER_PMD 512 + +/* + * entries per page directory level + */ +#define PTRS_PER_PTE 512 + +#define pte_ERROR(e) \ + printk("%s:%d: bad pte %016Lx.\n", __FILE__, __LINE__, pte_val(e)) +#define pmd_ERROR(e) \ + printk("%s:%d: bad pmd %016Lx.\n", __FILE__, __LINE__, pmd_val(e)) +#define pgd_ERROR(e) \ + printk("%s:%d: bad pgd %016Lx.\n", __FILE__, __LINE__, pgd_val(e)) + +/* + * Subtle, in PAE mode we cannot have zeroes in the top level + * page directory, the CPU enforces this. + */ +#define pgd_none(x) (pgd_val(x) == 1ULL) +extern inline int pgd_bad(pgd_t pgd) { return 0; } +extern inline int pgd_present(pgd_t pgd) { return !pgd_none(pgd); } +/* + * Pentium-II errata A13: in PAE mode we explicitly have to flush + * the TLB via cr3 if the top-level pgd is changed... This was one tough + * thing to find out - guess i should first read all the documentation + * next time around ;) + */ +extern inline void __pgd_clear (pgd_t * pgd) +{ + pgd_val(*pgd) = 1; // no zero allowed! +} + +extern inline void pgd_clear (pgd_t * pgd) +{ + __pgd_clear(pgd); + __flush_tlb(); +} + +#define pgd_page(pgd) \ +((unsigned long) __va(pgd_val(pgd) & PAGE_MASK)) + +/* Find an entry in the second-level page table.. */ +#define pmd_offset(dir, address) ((pmd_t *) pgd_page(*(dir)) + \ + __pmd_offset(address)) + +extern __inline__ pmd_t *get_pmd_slow(void) +{ + pmd_t *ret = (pmd_t *)__get_free_page(GFP_KERNEL); + + if (ret) + memset(ret, 0, PAGE_SIZE); + return ret; +} + +extern __inline__ pmd_t *get_pmd_fast(void) +{ + unsigned long *ret; + + if ((ret = pmd_quicklist) != NULL) { + pmd_quicklist = (unsigned long *)(*ret); + ret[0] = 0; + pgtable_cache_size--; + } else + ret = (unsigned long *)get_pmd_slow(); + return (pmd_t *)ret; +} + +extern __inline__ void free_pmd_fast(pmd_t *pmd) +{ + *(unsigned long *)pmd = (unsigned long) pmd_quicklist; + pmd_quicklist = (unsigned long *) pmd; + pgtable_cache_size++; +} + +extern __inline__ void free_pmd_slow(pmd_t *pmd) +{ + free_page((unsigned long)pmd); +} + +extern inline pmd_t * pmd_alloc(pgd_t *pgd, unsigned long address) +{ + if (!pgd) + BUG(); + address = (address >> PMD_SHIFT) & (PTRS_PER_PMD - 1); + if (pgd_none(*pgd)) { + pmd_t *page = get_pmd_fast(); + + if (!page) + page = get_pmd_slow(); + if (page) { + if (pgd_none(*pgd)) { + pgd_val(*pgd) = 1 + __pa(page); + __flush_tlb(); + return page + address; + } else + free_pmd_fast(page); + } else + return NULL; + } + return (pmd_t *)pgd_page(*pgd) + address; +} + +/* + * Subtle. offset can overflow 32 bits and that's a feature - we can do + * up to 16 TB swap on PAE. (Not that anyone should need that much + * swapspace, but who knows?) + */ +#define SWP_ENTRY(type,offset) __pte((((type) << 1) | ((offset) << 8ULL))) + +#endif /* _I386_PGTABLE_3LEVEL_H */ diff --git a/include/asm-i386/pgtable.h b/include/asm-i386/pgtable.h index aea0cd14b9c4..fdc7b0107009 100644 --- a/include/asm-i386/pgtable.h +++ b/include/asm-i386/pgtable.h @@ -100,44 +100,50 @@ static inline void flush_tlb_range(struct mm_struct * mm, unsigned long start, u flush_tlb_mm(mm); } - #endif #endif /* !__ASSEMBLY__ */ +#define pgd_quicklist (current_cpu_data.pgd_quick) +#define pmd_quicklist (current_cpu_data.pmd_quick) +#define pte_quicklist (current_cpu_data.pte_quick) +#define pgtable_cache_size (current_cpu_data.pgtable_cache_sz) + +/* + * The Linux x86 paging architecture is 'compile-time dual-mode', it + * implements both the traditional 2-level x86 page tables and the + * newer 3-level PAE-mode page tables. + */ +#ifndef __ASSEMBLY__ +#if CONFIG_X86_PAE +# include +#else +# include +#endif +#endif -/* Certain architectures need to do special things when PTEs +/* + * Certain architectures need to do special things when PTEs * within a page table are directly modified. Thus, the following * hook is made available. */ #define set_pte(pteptr, pteval) ((*(pteptr)) = (pteval)) -/* PMD_SHIFT determines the size of the area a second-level page table can map */ -#define PMD_SHIFT 22 +#define __beep() asm("movb $0x3,%al; outb %al,$0x61") + #define PMD_SIZE (1UL << PMD_SHIFT) #define PMD_MASK (~(PMD_SIZE-1)) - -/* PGDIR_SHIFT determines what a third-level page table entry can map */ -#define PGDIR_SHIFT 22 #define PGDIR_SIZE (1UL << PGDIR_SHIFT) #define PGDIR_MASK (~(PGDIR_SIZE-1)) -/* - * entries per page directory level: the i386 is two-level, so - * we don't really have any PMD directory physically. - */ -#define PTRS_PER_PTE 1024 -#define PTRS_PER_PMD 1 -#define PTRS_PER_PGD 1024 #define USER_PTRS_PER_PGD (TASK_SIZE/PGDIR_SIZE) -/* - * pgd entries used up by user/kernel: - */ - #define USER_PGD_PTRS (PAGE_OFFSET >> PGDIR_SHIFT) #define KERNEL_PGD_PTRS (PTRS_PER_PGD-USER_PGD_PTRS) -#define __USER_PGD_PTRS ((__PAGE_OFFSET >> PGDIR_SHIFT) & 0x3ff) -#define __KERNEL_PGD_PTRS (PTRS_PER_PGD-__USER_PGD_PTRS) + +#define TWOLEVEL_PGDIR_SHIFT 22 +#define BOOT_USER_PGD_PTRS (__PAGE_OFFSET >> TWOLEVEL_PGDIR_SHIFT) +#define BOOT_KERNEL_PGD_PTRS (1024-BOOT_USER_PGD_PTRS) + #ifndef __ASSEMBLY__ /* Just any arbitrary offset to the start of the vmalloc VM area: the @@ -166,7 +172,7 @@ static inline void flush_tlb_range(struct mm_struct * mm, unsigned long start, u #define _PAGE_PCD 0x010 #define _PAGE_ACCESSED 0x020 #define _PAGE_DIRTY 0x040 -#define _PAGE_4M 0x080 /* 4 MB page, Pentium+, if present.. */ +#define _PAGE_PSE 0x080 /* 4 MB (or 2MB) page, Pentium+, if present.. */ #define _PAGE_GLOBAL 0x100 /* Global TLB entry PPro+ */ #define _PAGE_PROTNONE 0x080 /* If not present */ @@ -213,40 +219,24 @@ static inline void flush_tlb_range(struct mm_struct * mm, unsigned long start, u /* page table for 0-4MB for everybody */ extern unsigned long pg0[1024]; -/* zero page used for uninitialized stuff */ -extern unsigned long empty_zero_page[1024]; /* - * BAD_PAGETABLE is used when we need a bogus page-table, while - * BAD_PAGE is used for a bogus page. - * * ZERO_PAGE is a global shared page that is always zero: used * for zero-mapped memory areas etc.. */ -extern pte_t __bad_page(void); -extern pte_t * __bad_pagetable(void); - -#define BAD_PAGETABLE __bad_pagetable() -#define BAD_PAGE __bad_page() -#define ZERO_PAGE(vaddr) ((unsigned long) empty_zero_page) - -/* number of bits that fit into a memory pointer */ -#define BITS_PER_PTR (8*sizeof(unsigned long)) - -/* to align the pointer to a pointer address */ -#define PTR_MASK (~(sizeof(void*)-1)) - -/* sizeof(void*)==1<>(PAGE_SHIFT-SIZEOF_PTR_LOG2)&PTR_MASK&~PAGE_MASK) +/* + * Handling allocation failures during page table setup. + */ +extern void __handle_bad_pmd(pmd_t * pmd); +extern void __handle_bad_pmd_kernel(pmd_t * pmd); #define pte_none(x) (!pte_val(x)) #define pte_present(x) (pte_val(x) & (_PAGE_PRESENT | _PAGE_PROTNONE)) #define pte_clear(xp) do { pte_val(*(xp)) = 0; } while (0) +#define pte_pagenr(x) ((unsigned long)((pte_val(x) >> PAGE_SHIFT))) #define pmd_none(x) (!pmd_val(x)) #define pmd_bad(x) ((pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE) @@ -254,14 +244,12 @@ extern pte_t * __bad_pagetable(void); #define pmd_clear(xp) do { pmd_val(*(xp)) = 0; } while (0) /* - * The "pgd_xxx()" functions here are trivial for a folded two-level - * setup: the pgd is never bad, and a pmd always exists (as it's folded - * into the pgd entry) + * Permanent address of a page. Obviously must never be + * called on a highmem page. */ -extern inline int pgd_none(pgd_t pgd) { return 0; } -extern inline int pgd_bad(pgd_t pgd) { return 0; } -extern inline int pgd_present(pgd_t pgd) { return 1; } -extern inline void pgd_clear(pgd_t * pgdp) { } +#define page_address(page) ({ if (PageHighMem(page)) BUG(); PAGE_OFFSET + (((page) - mem_map) << PAGE_SHIFT); }) +#define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT)) +#define pte_page(x) (mem_map+pte_pagenr(x)) /* * The following only work if pte_present() is true. @@ -288,8 +276,15 @@ extern inline pte_t pte_mkwrite(pte_t pte) { pte_val(pte) |= _PAGE_RW; return pt * Conversion functions: convert a page and protection to a page entry, * and a page entry and page directory to the page they refer to. */ -#define mk_pte(page, pgprot) \ -({ pte_t __pte; pte_val(__pte) = __pa(page) + pgprot_val(pgprot); __pte; }) + +extern inline pte_t mk_pte(struct page *page, pgprot_t pgprot) +{ + pte_t __pte; + + pte_val(__pte) = (page-mem_map)*(unsigned long long)PAGE_SIZE + + pgprot_val(pgprot); + return __pte; +} /* This takes a physical page address that is used by the remapping functions */ #define mk_pte_phys(physpage, pgprot) \ @@ -298,28 +293,29 @@ extern inline pte_t pte_mkwrite(pte_t pte) { pte_val(pte) |= _PAGE_RW; return pt extern inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { pte_val(pte) = (pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot); return pte; } -#define pte_page(pte) \ -((unsigned long) __va(pte_val(pte) & PAGE_MASK)) +#define page_pte_prot(page,prot) mk_pte(page, prot) +#define page_pte(page) page_pte_prot(page, __pgprot(0)) #define pmd_page(pmd) \ ((unsigned long) __va(pmd_val(pmd) & PAGE_MASK)) -/* to find an entry in a page-table-directory */ -#define pgd_offset(mm, address) \ -((mm)->pgd + ((address) >> PGDIR_SHIFT)) +/* to find an entry in a page-table-directory. */ +#define __pgd_offset(address) \ + ((address >> PGDIR_SHIFT) & (PTRS_PER_PGD-1)) + +#define pgd_offset(mm, address) ((mm)->pgd+__pgd_offset(address)) /* to find an entry in a kernel page-table-directory */ #define pgd_offset_k(address) pgd_offset(&init_mm, address) -/* Find an entry in the second-level page table.. */ -extern inline pmd_t * pmd_offset(pgd_t * dir, unsigned long address) -{ - return (pmd_t *) dir; -} +#define __pmd_offset(address) \ + (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1)) -/* Find an entry in the third-level page table.. */ -#define pte_offset(pmd, address) \ -((pte_t *) (pmd_page(*pmd) + ((address>>10) & ((PTRS_PER_PTE-1)<<2)))) +/* Find an entry in the third-level page table.. */ +#define __pte_offset(address) \ + ((address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1)) +#define pte_offset(dir, address) ((pte_t *) pmd_page(*(dir)) + \ + __pte_offset(address)) /* * Allocate and free page tables. The xxx_kernel() versions are @@ -327,17 +323,25 @@ extern inline pmd_t * pmd_offset(pgd_t * dir, unsigned long address) * if any. */ -#define pgd_quicklist (current_cpu_data.pgd_quick) -#define pmd_quicklist ((unsigned long *)0) -#define pte_quicklist (current_cpu_data.pte_quick) -#define pgtable_cache_size (current_cpu_data.pgtable_cache_sz) - extern __inline__ pgd_t *get_pgd_slow(void) { pgd_t *ret = (pgd_t *)__get_free_page(GFP_KERNEL); if (ret) { +#if 0 + /* + * On PAE allocating a whole page is overkill - we will + * either embedd this in mm_struct, or do a SLAB cache. + */ + memcpy(ret, swapper_pg_dir, PTRS_PER_PGD * sizeof(pgd_t)); +#endif +#if CONFIG_X86_PAE + int i; + for (i = 0; i < USER_PTRS_PER_PGD; i++) + __pgd_clear(ret + i); +#else memset(ret, 0, USER_PTRS_PER_PGD * sizeof(pgd_t)); +#endif memcpy(ret + USER_PTRS_PER_PGD, swapper_pg_dir + USER_PTRS_PER_PGD, (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t)); } return ret; @@ -395,30 +399,15 @@ extern __inline__ void free_pte_slow(pte_t *pte) free_page((unsigned long)pte); } -/* We don't use pmd cache, so these are dummy routines */ -extern __inline__ pmd_t *get_pmd_fast(void) -{ - return (pmd_t *)0; -} - -extern __inline__ void free_pmd_fast(pmd_t *pmd) -{ -} - -extern __inline__ void free_pmd_slow(pmd_t *pmd) -{ -} - -extern void __bad_pte(pmd_t *pmd); -extern void __bad_pte_kernel(pmd_t *pmd); - #define pte_free_kernel(pte) free_pte_slow(pte) -#define pte_free(pte) free_pte_slow(pte) -#define pgd_free(pgd) free_pgd_slow(pgd) -#define pgd_alloc() get_pgd_fast() +#define pte_free(pte) free_pte_slow(pte) +#define pgd_free(pgd) free_pgd_slow(pgd) +#define pgd_alloc() get_pgd_fast() extern inline pte_t * pte_alloc_kernel(pmd_t * pmd, unsigned long address) { + if (!pmd) + BUG(); address = (address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1); if (pmd_none(*pmd)) { pte_t * page = (pte_t *) get_pte_fast(); @@ -429,7 +418,7 @@ extern inline pte_t * pte_alloc_kernel(pmd_t * pmd, unsigned long address) return page + address; } if (pmd_bad(*pmd)) { - __bad_pte_kernel(pmd); + __handle_bad_pmd_kernel(pmd); return NULL; } return (pte_t *) pmd_page(*pmd) + address; @@ -437,13 +426,13 @@ extern inline pte_t * pte_alloc_kernel(pmd_t * pmd, unsigned long address) extern inline pte_t * pte_alloc(pmd_t * pmd, unsigned long address) { - address = (address >> (PAGE_SHIFT-2)) & 4*(PTRS_PER_PTE - 1); + address = (address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1); if (pmd_none(*pmd)) goto getnew; if (pmd_bad(*pmd)) goto fix; - return (pte_t *) (pmd_page(*pmd) + address); + return (pte_t *)pmd_page(*pmd) + address; getnew: { unsigned long page = (unsigned long) get_pte_fast(); @@ -451,25 +440,19 @@ getnew: if (!page) return get_pte_slow(pmd, address); pmd_val(*pmd) = _PAGE_TABLE + __pa(page); - return (pte_t *) (page + address); + return (pte_t *)page + address; } fix: - __bad_pte(pmd); + __handle_bad_pmd(pmd); return NULL; } /* * allocating and freeing a pmd is trivial: the 1-entry pmd is * inside the pgd, so has no extra memory associated with it. + * (In the PAE case we free the page.) */ -extern inline void pmd_free(pmd_t * pmd) -{ -} - -extern inline pmd_t * pmd_alloc(pgd_t * pgd, unsigned long address) -{ - return (pmd_t *) pgd; -} +#define pmd_free(pmd) free_pmd_slow(pmd) #define pmd_free_kernel pmd_free #define pmd_alloc_kernel pmd_alloc @@ -483,7 +466,7 @@ extern inline void set_pgdir(unsigned long address, pgd_t entry) #ifdef __SMP__ int i; #endif - + read_lock(&tasklist_lock); for_each_task(p) { if (!p->mm) @@ -512,9 +495,8 @@ extern inline void update_mmu_cache(struct vm_area_struct * vma, { } -#define SWP_TYPE(entry) (((entry) >> 1) & 0x3f) -#define SWP_OFFSET(entry) ((entry) >> 8) -#define SWP_ENTRY(type,offset) (((type) << 1) | ((offset) << 8)) +#define SWP_TYPE(entry) (((pte_val(entry)) >> 1) & 0x3f) +#define SWP_OFFSET(entry) ((pte_val(entry)) >> 8) #define module_map vmalloc #define module_unmap vfree @@ -527,4 +509,4 @@ extern inline void update_mmu_cache(struct vm_area_struct * vma, #define io_remap_page_range remap_page_range -#endif /* _I386_PAGE_H */ +#endif /* _I386_PGTABLE_H */ diff --git a/include/asm-i386/processor.h b/include/asm-i386/processor.h index 939ca0b31706..88f06686407d 100644 --- a/include/asm-i386/processor.h +++ b/include/asm-i386/processor.h @@ -46,6 +46,7 @@ struct cpuinfo_x86 { int coma_bug; unsigned long loops_per_sec; unsigned long *pgd_quick; + unsigned long *pmd_quick; unsigned long *pte_quick; unsigned long pgtable_cache_sz; }; @@ -106,6 +107,12 @@ extern struct cpuinfo_x86 cpu_data[]; #define current_cpu_data boot_cpu_data #endif +#define cpu_has_pge \ + (boot_cpu_data.x86_capability & X86_FEATURE_PGE) +#define cpu_has_pse \ + (boot_cpu_data.x86_capability & X86_FEATURE_PSE) +#define cpu_has_pae \ + (boot_cpu_data.x86_capability & X86_FEATURE_PAE) #define cpu_has_tsc \ (cpu_data[smp_processor_id()].x86_capability & X86_FEATURE_TSC) diff --git a/include/asm-i386/smp.h b/include/asm-i386/smp.h index 52c27bead13d..2aa6aec4e4e4 100644 --- a/include/asm-i386/smp.h +++ b/include/asm-i386/smp.h @@ -166,7 +166,8 @@ struct mpc_config_lintsrc extern int smp_found_config; extern void init_smp_config(void); -extern unsigned long smp_alloc_memory(unsigned long mem_base); +extern void init_smp_mappings(void); +extern void smp_alloc_memory(void); extern unsigned long cpu_present_map; extern unsigned long cpu_online_map; extern volatile unsigned long smp_invalidate_needed; @@ -179,6 +180,7 @@ extern void smp_invalidate_rcv(void); /* Process an NMI */ extern void smp_local_timer_interrupt(struct pt_regs * regs); extern void (*mtrr_hook) (void); extern void setup_APIC_clocks(void); +extern void zap_low_mappings (void); extern volatile int cpu_number_map[NR_CPUS]; extern volatile int __cpu_logical_map[NR_CPUS]; extern inline int cpu_logical_map(int cpu) diff --git a/include/linux/bigmem.h b/include/linux/bigmem.h deleted file mode 100644 index 289183bfe367..000000000000 --- a/include/linux/bigmem.h +++ /dev/null @@ -1,48 +0,0 @@ -#ifndef _LINUX_BIGMEM_H -#define _LINUX_BIGMEM_H - -#include - -#ifdef CONFIG_BIGMEM - -#include - -/* declarations for linux/mm/bigmem.c */ -extern unsigned long bigmem_mapnr; -extern int nr_free_bigpages; - -extern struct page * prepare_bigmem_swapout(struct page *); -extern struct page * replace_with_bigmem(struct page *); - -#else /* CONFIG_BIGMEM */ - -#define prepare_bigmem_swapout(page) page -#define replace_with_bigmem(page) page -#define kmap(kaddr, type) kaddr -#define kunmap(vaddr, type) do { } while (0) -#define nr_free_bigpages 0 - -#endif /* CONFIG_BIGMEM */ - -/* when CONFIG_BIGMEM is not set these will be plain clear/copy_page */ -extern inline void clear_bigpage(unsigned long kaddr) -{ - unsigned long vaddr; - - vaddr = kmap(kaddr, KM_WRITE); - clear_page(vaddr); - kunmap(vaddr, KM_WRITE); -} - -extern inline void copy_bigpage(unsigned long to, unsigned long from) -{ - unsigned long vfrom, vto; - - vfrom = kmap(from, KM_READ); - vto = kmap(to, KM_WRITE); - copy_page(vto, vfrom); - kunmap(vfrom, KM_READ); - kunmap(vto, KM_WRITE); -} - -#endif /* _LINUX_BIGMEM_H */ diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index 4f7fe13f775e..31721c101d85 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -18,7 +18,7 @@ */ struct linux_binprm{ char buf[128]; - unsigned long page[MAX_ARG_PAGES]; + struct page *page[MAX_ARG_PAGES]; unsigned long p; /* current top of mem */ int sh_bang; struct dentry * dentry; diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h new file mode 100644 index 000000000000..4b18c7ce2218 --- /dev/null +++ b/include/linux/bootmem.h @@ -0,0 +1,24 @@ +#ifndef _LINUX_BOOTMEM_H +#define _LINUX_BOOTMEM_H + +#include +#include + +/* + * simple boot-time physical memory area allocator. + */ + +extern unsigned long max_low_pfn; + +extern unsigned long __init init_bootmem (unsigned long addr, unsigned long memend); +extern void __init reserve_bootmem (unsigned long addr, unsigned long size); +extern void __init free_bootmem (unsigned long addr, unsigned long size); +extern void * __init __alloc_bootmem (unsigned long size, unsigned long align); +#define alloc_bootmem(x) __alloc_bootmem((x), SMP_CACHE_BYTES) +#define alloc_bootmem_pages(x) __alloc_bootmem((x), PAGE_SIZE) +extern unsigned long __init free_all_bootmem (void); + +#endif /* _LINUX_BOOTMEM_H */ + + + diff --git a/include/linux/fs.h b/include/linux/fs.h index c3dfc01a5c42..c6c7d76d21e1 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -323,6 +323,11 @@ struct iattr { #include #include +/* + * oh the beauties of C type declarations. + */ +struct page; + struct inode { struct list_head i_hash; struct list_head i_list; @@ -350,7 +355,7 @@ struct inode { wait_queue_head_t i_wait; struct file_lock *i_flock; struct vm_area_struct *i_mmap; - struct page *i_pages; + struct list_head i_pages; spinlock_t i_shared_lock; struct dquot *i_dquot[MAXQUOTAS]; struct pipe_inode_info *i_pipe; @@ -769,8 +774,6 @@ extern int fs_may_mount(kdev_t); extern int try_to_free_buffers(struct page *); extern void refile_buffer(struct buffer_head * buf); -extern atomic_t buffermem; - #define BUF_CLEAN 0 #define BUF_LOCKED 1 /* Buffers scheduled for write */ #define BUF_DIRTY 2 /* Dirty buffers, not yet scheduled for write */ @@ -874,7 +877,7 @@ typedef struct { int error; } read_descriptor_t; -typedef int (*read_actor_t)(read_descriptor_t *, const char *, unsigned long); +typedef int (*read_actor_t)(read_descriptor_t *, struct page *, unsigned long, unsigned long); extern struct dentry * lookup_dentry(const char *, struct dentry *, unsigned int); diff --git a/include/linux/highmem.h b/include/linux/highmem.h new file mode 100644 index 000000000000..e0e9e2993b8b --- /dev/null +++ b/include/linux/highmem.h @@ -0,0 +1,77 @@ +#ifndef _LINUX_HIGHMEM_H +#define _LINUX_HIGHMEM_H + +#include +#include + +#ifdef CONFIG_HIGHMEM + +extern struct page *highmem_start_page; + +#include + +/* declarations for linux/mm/highmem.c */ +extern unsigned long highmem_mapnr; +extern unsigned long nr_free_highpages; + +extern struct page * prepare_highmem_swapout(struct page *); +extern struct page * replace_with_highmem(struct page *); + +#else /* CONFIG_HIGHMEM */ + +#define prepare_highmem_swapout(page) page +#define replace_with_highmem(page) page +#define kmap(page, type) page_address(page) +#define kunmap(vaddr, type) do { } while (0) +#define nr_free_highpages 0UL + +#endif /* CONFIG_HIGHMEM */ + +/* when CONFIG_HIGHMEM is not set these will be plain clear/copy_page */ +extern inline void clear_highpage(struct page *page) +{ + unsigned long kaddr; + + kaddr = kmap(page, KM_WRITE); + clear_page((void *)kaddr); + kunmap(kaddr, KM_WRITE); +} + +extern inline void memclear_highpage(struct page *page, unsigned int offset, unsigned int size) +{ + unsigned long kaddr; + + if (offset + size > PAGE_SIZE) + BUG(); + kaddr = kmap(page, KM_WRITE); + memset((void *)(kaddr + offset), 0, size); + kunmap(kaddr, KM_WRITE); +} + +/* + * Same but also flushes aliased cache contents to RAM. + */ +extern inline void memclear_highpage_flush(struct page *page, unsigned int offset, unsigned int size) +{ + unsigned long kaddr; + + if (offset + size > PAGE_SIZE) + BUG(); + kaddr = kmap(page, KM_WRITE); + memset((void *)(kaddr + offset), 0, size); + flush_page_to_ram(kaddr); + kunmap(kaddr, KM_WRITE); +} + +extern inline void copy_highpage(struct page *to, struct page *from) +{ + unsigned long vfrom, vto; + + vfrom = kmap(from, KM_READ); + vto = kmap(to, KM_WRITE); + copy_page((void *)vto, (void *)vfrom); + kunmap(vfrom, KM_READ); + kunmap(vto, KM_WRITE); +} + +#endif /* _LINUX_HIGHMEM_H */ diff --git a/include/linux/iobuf.h b/include/linux/iobuf.h index 9418888f2351..420285faf62f 100644 --- a/include/linux/iobuf.h +++ b/include/linux/iobuf.h @@ -41,7 +41,6 @@ struct kiobuf * region, there won't necessarily be page structs defined for * every address. */ - unsigned long * pagelist; struct page ** maplist; unsigned int locked : 1; /* If set, pages has been locked */ diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 89bea4477369..d5b204c2c8a8 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -94,9 +94,10 @@ struct sysinfo { unsigned long totalswap; /* Total swap space size */ unsigned long freeswap; /* swap space still available */ unsigned short procs; /* Number of current processes */ - unsigned long totalbig; /* Total big memory size */ - unsigned long freebig; /* Available big memory size */ - char _f[20-2*sizeof(long)]; /* Padding: libc5 uses this.. */ + unsigned long totalhigh; /* Total high memory size */ + unsigned long freehigh; /* Available high memory size */ + unsigned int mem_unit; /* Memory unit size in bytes */ + char _f[20-2*sizeof(long)-sizeof(int)]; /* Padding: libc5 uses this.. */ }; #endif diff --git a/include/linux/mm.h b/include/linux/mm.h index c6df59665c95..de8393a2d0f5 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -8,6 +8,7 @@ #include #include +#include extern unsigned long max_mapnr; extern unsigned long num_physpages; @@ -103,9 +104,8 @@ struct vm_operations_struct { void (*protect)(struct vm_area_struct *area, unsigned long, size_t, unsigned int newprot); int (*sync)(struct vm_area_struct *area, unsigned long, size_t, unsigned int flags); void (*advise)(struct vm_area_struct *area, unsigned long, size_t, unsigned int advise); - unsigned long (*nopage)(struct vm_area_struct * area, unsigned long address, int write_access); - unsigned long (*wppage)(struct vm_area_struct * area, unsigned long address, - unsigned long page); + struct page * (*nopage)(struct vm_area_struct * area, unsigned long address, int write_access); + struct page * (*wppage)(struct vm_area_struct * area, unsigned long address, struct page * page); int (*swapout)(struct vm_area_struct *, struct page *); }; @@ -119,8 +119,7 @@ struct vm_operations_struct { */ typedef struct page { /* these must be first (free area handling) */ - struct page *next; - struct page *prev; + struct list_head list; struct inode *inode; unsigned long offset; struct page *next_hash; @@ -149,11 +148,11 @@ typedef struct page { #define PG_uptodate 3 #define PG_decr_after 5 #define PG_DMA 7 -#define PG_Slab 8 +#define PG_slab 8 #define PG_swap_cache 9 #define PG_skip 10 #define PG_swap_entry 11 -#define PG_BIGMEM 12 +#define PG_highmem 12 /* bits 21-30 unused */ #define PG_reserved 31 @@ -183,27 +182,32 @@ if (!test_and_clear_bit(PG_locked, &(page)->flags)) { \ #define PageReferenced(page) (test_bit(PG_referenced, &(page)->flags)) #define PageDecrAfter(page) (test_bit(PG_decr_after, &(page)->flags)) #define PageDMA(page) (test_bit(PG_DMA, &(page)->flags)) -#define PageSlab(page) (test_bit(PG_Slab, &(page)->flags)) +#define PageSlab(page) (test_bit(PG_slab, &(page)->flags)) #define PageSwapCache(page) (test_bit(PG_swap_cache, &(page)->flags)) #define PageReserved(page) (test_bit(PG_reserved, &(page)->flags)) -#define PageSetSlab(page) (set_bit(PG_Slab, &(page)->flags)) +#define PageSetSlab(page) (set_bit(PG_slab, &(page)->flags)) #define PageSetSwapCache(page) (set_bit(PG_swap_cache, &(page)->flags)) #define PageTestandSetSwapCache(page) \ (test_and_set_bit(PG_swap_cache, &(page)->flags)) -#define PageClearSlab(page) (clear_bit(PG_Slab, &(page)->flags)) +#define PageClearSlab(page) (clear_bit(PG_slab, &(page)->flags)) #define PageClearSwapCache(page)(clear_bit(PG_swap_cache, &(page)->flags)) #define PageTestandClearSwapCache(page) \ (test_and_clear_bit(PG_swap_cache, &(page)->flags)) -#ifdef CONFIG_BIGMEM -#define PageBIGMEM(page) (test_bit(PG_BIGMEM, &(page)->flags)) +#ifdef CONFIG_HIGHMEM +#define PageHighMem(page) (test_bit(PG_highmem, &(page)->flags)) #else -#define PageBIGMEM(page) 0 /* needed to optimize away at compile time */ +#define PageHighMem(page) 0 /* needed to optimize away at compile time */ #endif +#define SetPageReserved(page) do { set_bit(PG_reserved, &(page)->flags); \ + } while (0) +#define ClearPageReserved(page) do { test_and_clear_bit(PG_reserved, &(page)->flags); } while (0) + + /* * Various page->flags bits: * @@ -224,7 +228,7 @@ if (!test_and_clear_bit(PG_locked, &(page)->flags)) { \ * (e.g. a private data page of one process). * * A page may be used for kmalloc() or anyone else who does a - * get_free_page(). In this case the page->count is at least 1, and + * __get_free_page(). In this case the page->count is at least 1, and * all other fields are unused but should be 0 or NULL. The * management of this page is the responsibility of the one who uses * it. @@ -281,20 +285,27 @@ extern mem_map_t * mem_map; * goes to clearing the page. If you want a page without the clearing * overhead, just use __get_free_page() directly.. */ +extern struct page * __get_pages(int gfp_mask, unsigned long order); #define __get_free_page(gfp_mask) __get_free_pages((gfp_mask),0) #define __get_dma_pages(gfp_mask, order) __get_free_pages((gfp_mask) | GFP_DMA,(order)) extern unsigned long FASTCALL(__get_free_pages(int gfp_mask, unsigned long gfp_order)); +extern struct page * get_free_highpage(int gfp_mask); -extern inline unsigned long get_free_page(int gfp_mask) +extern inline unsigned long get_zeroed_page(int gfp_mask) { unsigned long page; page = __get_free_page(gfp_mask); if (page) - clear_page(page); + clear_page((void *)page); return page; } +/* + * The old interface name will be removed in 2.5: + */ +#define get_free_page get_zeroed_page + /* memory.c & swap.c*/ #define free_page(addr) free_pages((addr),0) @@ -302,7 +313,7 @@ extern int FASTCALL(free_pages(unsigned long addr, unsigned long order)); extern int FASTCALL(__free_page(struct page *)); extern void show_free_areas(void); -extern unsigned long put_dirty_page(struct task_struct * tsk,unsigned long page, +extern struct page * put_dirty_page(struct task_struct * tsk, struct page *page, unsigned long address); extern void clear_page_tables(struct mm_struct *, unsigned long, int); @@ -322,12 +333,13 @@ extern int ptrace_writedata(struct task_struct *tsk, char * src, unsigned long d extern int pgt_cache_water[2]; extern int check_pgt_cache(void); -extern unsigned long paging_init(unsigned long start_mem, unsigned long end_mem); -extern void mem_init(unsigned long start_mem, unsigned long end_mem); +extern void paging_init(void); +extern void free_area_init(unsigned long); +extern void mem_init(void); extern void show_mem(void); extern void oom(struct task_struct * tsk); extern void si_meminfo(struct sysinfo * val); -extern void swapin_readahead(unsigned long); +extern void swapin_readahead(pte_t); /* mmap.c */ extern void vma_init(void); @@ -359,18 +371,18 @@ extern void put_cached_page(unsigned long); #define __GFP_HIGH 0x08 #define __GFP_IO 0x10 #define __GFP_SWAP 0x20 -#ifdef CONFIG_BIGMEM -#define __GFP_BIGMEM 0x40 +#ifdef CONFIG_HIGHMEM +#define __GFP_HIGHMEM 0x40 #else -#define __GFP_BIGMEM 0x0 /* noop */ +#define __GFP_HIGHMEM 0x0 /* noop */ #endif #define __GFP_DMA 0x80 #define GFP_BUFFER (__GFP_LOW | __GFP_WAIT) #define GFP_ATOMIC (__GFP_HIGH) -#define GFP_BIGUSER (__GFP_LOW | __GFP_WAIT | __GFP_IO | __GFP_BIGMEM) #define GFP_USER (__GFP_LOW | __GFP_WAIT | __GFP_IO) +#define GFP_HIGHUSER (GFP_USER | __GFP_HIGHMEM) #define GFP_KERNEL (__GFP_MED | __GFP_WAIT | __GFP_IO) #define GFP_NFS (__GFP_HIGH | __GFP_WAIT | __GFP_IO) #define GFP_KSWAPD (__GFP_IO | __GFP_SWAP) @@ -380,10 +392,10 @@ extern void put_cached_page(unsigned long); #define GFP_DMA __GFP_DMA -/* Flag - indicates that the buffer can be taken from big memory which is not +/* Flag - indicates that the buffer can be taken from high memory which is not directly addressable by the kernel */ -#define GFP_BIGMEM __GFP_BIGMEM +#define GFP_HIGHMEM __GFP_HIGHMEM /* vma is the first one with address < vma->vm_end, * and even address < vma->vm_start. Have to extend vma. */ @@ -422,7 +434,7 @@ static inline struct vm_area_struct * find_vma_intersection(struct mm_struct * m extern struct vm_area_struct *find_extend_vma(struct task_struct *tsk, unsigned long addr); -#define buffer_under_min() ((atomic_read(&buffermem) >> PAGE_SHIFT) * 100 < \ +#define buffer_under_min() (atomic_read(&buffermem_pages) * 100 < \ buffer_mem.min_percent * num_physpages) #define pgcache_under_min() (atomic_read(&page_cache_size) * 100 < \ page_cache.min_percent * num_physpages) diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 0aff25c2908f..6410d3d1e88f 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -11,10 +11,16 @@ #include #include +#include +#include -static inline unsigned long page_address(struct page * page) +extern inline pte_t get_pagecache_pte(struct page *page) { - return PAGE_OFFSET + ((page - mem_map) << PAGE_SHIFT); + /* + * the pagecache is still machineword sized. The rest of the VM + * can deal with arbitrary sized ptes. + */ + return __pte(page->offset); } /* @@ -30,8 +36,8 @@ static inline unsigned long page_address(struct page * page) #define PAGE_CACHE_MASK PAGE_MASK #define PAGE_CACHE_ALIGN(addr) (((addr)+PAGE_CACHE_SIZE-1)&PAGE_CACHE_MASK) -#define page_cache_alloc() __get_free_page(GFP_USER) -#define page_cache_free(x) free_page(x) +#define page_cache_alloc() __get_pages(GFP_USER, 0) +#define page_cache_free(x) __free_page(x) #define page_cache_release(x) __free_page(x) /* @@ -54,7 +60,7 @@ extern void page_cache_init(unsigned long); * inode pointer and offsets are distributed (ie, we * roughly know which bits are "significant") */ -static inline unsigned long _page_hashfn(struct inode * inode, unsigned long offset) +extern inline unsigned long _page_hashfn(struct inode * inode, unsigned long offset) { #define i (((unsigned long) inode)/(sizeof(struct inode) & ~ (sizeof(struct inode) - 1))) #define o (offset >> PAGE_SHIFT) @@ -82,26 +88,37 @@ extern void __add_page_to_hash_queue(struct page * page, struct page **p); extern void add_to_page_cache(struct page * page, struct inode * inode, unsigned long offset); extern int add_to_page_cache_unique(struct page * page, struct inode * inode, unsigned long offset, struct page **hash); -static inline void add_page_to_hash_queue(struct page * page, struct inode * inode, unsigned long offset) +extern inline void add_page_to_hash_queue(struct page * page, struct inode * inode, unsigned long offset) { __add_page_to_hash_queue(page, page_hash(inode,offset)); } -static inline void add_page_to_inode_queue(struct inode * inode, struct page * page) +extern inline void add_page_to_inode_queue(struct inode * inode, struct page * page) { - struct page **p = &inode->i_pages; - - inode->i_nrpages++; + struct list_head *head = &inode->i_pages; + + if (!inode->i_nrpages++) { + if (!list_empty(head)) + BUG(); + } else { + if (list_empty(head)) + BUG(); + } + list_add(&page->list, head); page->inode = inode; - page->prev = NULL; - if ((page->next = *p) != NULL) - page->next->prev = page; - *p = page; +} + +extern inline void remove_page_from_inode_queue(struct page * page) +{ + struct inode * inode = page->inode; + + inode->i_nrpages--; + list_del(&page->list); } extern void ___wait_on_page(struct page *); -static inline void wait_on_page(struct page * page) +extern inline void wait_on_page(struct page * page) { if (PageLocked(page)) ___wait_on_page(page); diff --git a/include/linux/sched.h b/include/linux/sched.h index dd5dcf2c8b4c..81ec83c273bb 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -426,7 +426,7 @@ struct task_struct { /* files */ &init_files, \ /* mm */ NULL, &init_mm, \ /* signals */ SPIN_LOCK_UNLOCKED, &init_signals, {{0}}, {{0}}, NULL, &init_task.sigqueue, 0, 0, \ -/* exec cts */ 0,0,0, \ +/* exec cts */ 0,0, \ } #ifndef INIT_TASK_SIZE diff --git a/include/linux/shm.h b/include/linux/shm.h index d837873123d1..6ba237e923da 100644 --- a/include/linux/shm.h +++ b/include/linux/shm.h @@ -24,7 +24,7 @@ struct shmid_kernel struct shmid_ds u; /* the following are private */ unsigned long shm_npages; /* size of segment (pages) */ - unsigned long *shm_pages; /* array of ptrs to frames -> SHMMAX */ + pte_t *shm_pages; /* array of ptrs to frames -> SHMMAX */ struct vm_area_struct *attaches; /* descriptors for attaches */ }; @@ -72,7 +72,7 @@ asmlinkage long sys_shmget (key_t key, int size, int flag); asmlinkage long sys_shmat (int shmid, char *shmaddr, int shmflg, unsigned long *addr); asmlinkage long sys_shmdt (char *shmaddr); asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds *buf); -extern void shm_unuse(unsigned long entry, unsigned long page); +extern void shm_unuse(pte_t entry, struct page *page); #endif /* __KERNEL__ */ diff --git a/include/linux/slab.h b/include/linux/slab.h index 3097a8db26f2..fa344d816a17 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -45,7 +45,7 @@ typedef struct kmem_cache_s kmem_cache_t; #define SLAB_CTOR_VERIFY 0x004UL /* tell constructor it's a verify call */ /* prototypes */ -extern long kmem_cache_init(long, long); +extern void kmem_cache_init(void); extern void kmem_cache_sizes_init(void); extern kmem_cache_t *kmem_find_general_cachep(size_t); extern kmem_cache_t *kmem_cache_create(const char *, size_t, size_t, unsigned long, diff --git a/include/linux/swap.h b/include/linux/swap.h index 0b0baf1e8feb..7030b788d395 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -35,8 +35,6 @@ union swap_header { #define MAX_SWAP_BADPAGES \ ((__swapoffset(magic.magic) - __swapoffset(info.badpages)) / sizeof(int)) -#undef DEBUG_SWAP - #include #define SWP_USED 1 @@ -69,7 +67,7 @@ extern struct list_head lru_cache; extern atomic_t nr_async_pages; extern struct inode swapper_inode; extern atomic_t page_cache_size; -extern atomic_t buffermem; +extern atomic_t buffermem_pages; /* Incomplete types for prototype declarations: */ struct task_struct; @@ -87,36 +85,35 @@ extern int try_to_free_pages(unsigned int gfp_mask); /* linux/mm/page_io.c */ extern void rw_swap_page(int, struct page *, int); -extern void rw_swap_page_nolock(int, unsigned long, char *, int); -extern void swap_after_unlock_page (unsigned long entry); +extern void rw_swap_page_nolock(int, pte_t, char *, int); /* linux/mm/page_alloc.c */ /* linux/mm/swap_state.c */ extern void show_swap_cache_info(void); -extern void add_to_swap_cache(struct page *, unsigned long); -extern int swap_duplicate(unsigned long); +extern void add_to_swap_cache(struct page *, pte_t); +extern int swap_duplicate(pte_t); extern int swap_check_entry(unsigned long); -struct page * lookup_swap_cache(unsigned long); -extern struct page * read_swap_cache_async(unsigned long, int); +struct page * lookup_swap_cache(pte_t); +extern struct page * read_swap_cache_async(pte_t, int); #define read_swap_cache(entry) read_swap_cache_async(entry, 1); -extern int FASTCALL(swap_count(unsigned long)); -extern unsigned long acquire_swap_entry(struct page *page); +extern int swap_count(struct page *); +extern pte_t acquire_swap_entry(struct page *page); /* * Make these inline later once they are working properly. */ extern void __delete_from_swap_cache(struct page *page); extern void delete_from_swap_cache(struct page *page); -extern void free_page_and_swap_cache(unsigned long addr); +extern void free_page_and_swap_cache(struct page *page); /* linux/mm/swapfile.c */ extern unsigned int nr_swapfiles; extern struct swap_info_struct swap_info[]; extern int is_swap_partition(kdev_t); void si_swapinfo(struct sysinfo *); -unsigned long get_swap_page(void); -extern void FASTCALL(swap_free(unsigned long)); +pte_t get_swap_page(void); +extern void swap_free(pte_t); struct swap_list_t { int head; /* head of priority-ordered swapfile list */ int next; /* swapfile to be used next */ @@ -158,7 +155,7 @@ static inline int is_page_shared(struct page *page) return 1; count = page_count(page); if (PageSwapCache(page)) - count += swap_count(page->offset) - 2; + count += swap_count(page) - 2; return count > 1; } diff --git a/include/linux/tty.h b/include/linux/tty.h index 8d6db96e5419..b228797f7b17 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -339,12 +339,13 @@ extern int fg_console, last_console, want_console; extern int kmsg_redirect; -extern unsigned long con_init(unsigned long); +extern void con_init(void); +extern void console_init(void); extern int rs_init(void); extern int lp_init(void); extern int pty_init(void); -extern int tty_init(void); +extern void tty_init(void); extern int ip2_init(void); extern int pcxe_init(void); extern int pc_init(void); @@ -393,7 +394,7 @@ extern int n_tty_ioctl(struct tty_struct * tty, struct file * file, /* serial.c */ -extern long serial_console_init(long kmem_start, long kmem_end); +extern void serial_console_init(void); /* pcxx.c */ diff --git a/init/main.c b/init/main.c index f0d17e56c534..c81acaeaa8ea 100644 --- a/init/main.c +++ b/init/main.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -79,7 +80,6 @@ static int init(void *); extern void init_IRQ(void); extern void init_modules(void); -extern long console_init(long, long); extern void sock_init(void); extern void fork_init(unsigned long); extern void mca_init(void); @@ -110,9 +110,6 @@ extern void dquot_init_hash(void); extern void time_init(void); -static unsigned long memory_start = 0; -static unsigned long memory_end = 0; - int rows, cols; #ifdef CONFIG_BLK_DEV_INITRD @@ -423,7 +420,7 @@ static void __init parse_options(char *line) } -extern void setup_arch(char **, unsigned long *, unsigned long *); +extern void setup_arch(char **); extern void cpu_idle(void); #ifndef __SMP__ @@ -450,15 +447,15 @@ static void __init smp_init(void) asmlinkage void __init start_kernel(void) { char * command_line; - + unsigned long mempages; /* * Interrupts are still disabled. Do necessary setups, then * enable them */ lock_kernel(); printk(linux_banner); - setup_arch(&command_line, &memory_start, &memory_end); - memory_start = paging_init(memory_start,memory_end); + setup_arch(&command_line); + paging_init(); trap_init(); init_IRQ(); sched_init(); @@ -470,40 +467,45 @@ asmlinkage void __init start_kernel(void) * we've done PCI setups etc, and console_init() must be aware of * this. But we do want output early, in case something goes wrong. */ - memory_start = console_init(memory_start,memory_end); + console_init(); #ifdef CONFIG_MODULES init_modules(); #endif if (prof_shift) { - prof_buffer = (unsigned int *) memory_start; + unsigned int size; /* only text is profiled */ prof_len = (unsigned long) &_etext - (unsigned long) &_stext; prof_len >>= prof_shift; - memory_start += prof_len * sizeof(unsigned int); - memset(prof_buffer, 0, prof_len * sizeof(unsigned int)); + + size = prof_len * sizeof(unsigned int) + PAGE_SIZE-1; + prof_buffer = (unsigned int *) alloc_bootmem(size); + memset(prof_buffer, 0, size); } - memory_start = kmem_cache_init(memory_start, memory_end); + kmem_cache_init(); sti(); calibrate_delay(); #ifdef CONFIG_BLK_DEV_INITRD + // FIXME, use the bootmem.h interface. if (initrd_start && !initrd_below_start_ok && initrd_start < memory_start) { printk(KERN_CRIT "initrd overwritten (0x%08lx < 0x%08lx) - " "disabling it.\n",initrd_start,memory_start); initrd_start = 0; } #endif - mem_init(memory_start,memory_end); + mem_init(); kmem_cache_sizes_init(); #ifdef CONFIG_PROC_FS proc_root_init(); #endif - fork_init(memory_end-memory_start); + mempages = num_physpages; + + fork_init(mempages); filescache_init(); dcache_init(); vma_init(); - buffer_init(memory_end-memory_start); - page_cache_init(memory_end-memory_start); + buffer_init(mempages); + page_cache_init(mempages); kiobuf_init(); signals_init(); inode_init(); diff --git a/ipc/shm.c b/ipc/shm.c index 71a2b4eb779c..653634ca81e8 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -1,13 +1,14 @@ /* * linux/ipc/shm.c * Copyright (C) 1992, 1993 Krishna Balasubramanian - * Many improvements/fixes by Bruno Haible. + * Many improvements/fixes by Bruno Haible. * Replaced `struct shm_desc' by `struct vm_area_struct', July 1994. * Fixed the shm swap deallocation (shm_unuse()), August 1998 Andrea Arcangeli. * * /proc/sysvipc/shm support (c) 1999 Dragos Acostachioaie * BIGMEM support, Andrea Arcangeli * SMP thread shm, Jean-Luc Boyard + * HIGHMEM support, Ingo Molnar */ #include @@ -19,20 +20,19 @@ #include #include #include -#include +#include #include #include extern int ipcperms (struct ipc_perm *ipcp, short shmflg); -extern unsigned long get_swap_page (void); static int findkey (key_t key); static int newseg (key_t key, int shmflg, int size); static int shm_map (struct vm_area_struct *shmd); static void killseg (int id); static void shm_open (struct vm_area_struct *shmd); static void shm_close (struct vm_area_struct *shmd); -static unsigned long shm_nopage(struct vm_area_struct *, unsigned long, int); +static struct page * shm_nopage(struct vm_area_struct *, unsigned long, int); static int shm_swapout(struct vm_area_struct *, struct page *); #ifdef CONFIG_PROC_FS static int sysvipc_shm_read_proc(char *buffer, char **start, off_t offset, int length, int *eof, void *data); @@ -108,7 +108,7 @@ static int newseg (key_t key, int shmflg, int size) { struct shmid_kernel *shp; int numpages = (size + PAGE_SIZE -1) >> PAGE_SHIFT; - int id, i; + int id; if (size < SHMMIN) return -EINVAL; @@ -131,7 +131,7 @@ found: return -ENOMEM; } lock_kernel(); - shp->shm_pages = (ulong *) vmalloc (numpages*sizeof(ulong)); + shp->shm_pages = (pte_t *) vmalloc (numpages*sizeof(pte_t)); unlock_kernel(); if (!shp->shm_pages) { kfree(shp); @@ -141,7 +141,8 @@ found: return -ENOMEM; } - for (i = 0; i < numpages; shp->shm_pages[i++] = 0); + memset(shp->shm_pages, 0, numpages*sizeof(pte_t)); + shp->u.shm_perm.key = key; shp->u.shm_perm.mode = (shmflg & S_IRWXUGO); shp->u.shm_perm.cuid = shp->u.shm_perm.uid = current->euid; @@ -214,33 +215,29 @@ static void killseg (int id) int rss, swp; shp = shm_segs[id]; - if (shp == IPC_NOID || shp == IPC_UNUSED) { - printk ("shm nono: killseg called on unused seg id=%d\n", id); - return; - } + if (shp == IPC_NOID || shp == IPC_UNUSED) + BUG(); shp->u.shm_perm.seq++; /* for shmat */ shm_seq = (shm_seq+1) % ((unsigned)(1<<31)/SHMMNI); /* increment, but avoid overflow */ shm_segs[id] = (struct shmid_kernel *) IPC_UNUSED; used_segs--; if (id == max_shmid) while (max_shmid && (shm_segs[--max_shmid] == IPC_UNUSED)); - if (!shp->shm_pages) { - printk ("shm nono: killseg shp->pages=NULL. id=%d\n", id); - return; - } + if (!shp->shm_pages) + BUG(); spin_unlock(&shm_lock); numpages = shp->shm_npages; for (i = 0, rss = 0, swp = 0; i < numpages ; i++) { pte_t pte; - pte = __pte(shp->shm_pages[i]); + pte = shp->shm_pages[i]; if (pte_none(pte)) continue; if (pte_present(pte)) { - free_page (pte_page(pte)); + __free_page (pte_page(pte)); rss++; } else { lock_kernel(); - swap_free(pte_val(pte)); + swap_free(pte); unlock_kernel(); swp++; } @@ -484,16 +481,12 @@ asmlinkage long sys_shmat (int shmid, char *shmaddr, int shmflg, ulong *raddr) down(¤t->mm->mmap_sem); spin_lock(&shm_lock); - if (shmid < 0) { - /* printk("shmat() -> EINVAL because shmid = %d < 0\n",shmid); */ + if (shmid < 0) goto out; - } shp = shm_segs[id = (unsigned int) shmid % SHMMNI]; - if (shp == IPC_UNUSED || shp == IPC_NOID) { - /* printk("shmat() -> EINVAL because shmid = %d is invalid\n",shmid); */ + if (shp == IPC_UNUSED || shp == IPC_NOID) goto out; - } if (!(addr = (ulong) shmaddr)) { if (shmflg & SHM_REMAP) @@ -526,16 +519,9 @@ asmlinkage long sys_shmat (int shmid, char *shmaddr, int shmflg, ulong *raddr) */ if (addr < current->mm->start_stack && addr > current->mm->start_stack - PAGE_SIZE*(shp->shm_npages + 4)) - { - /* printk("shmat() -> EINVAL because segment intersects stack\n"); */ goto out; - } - if (!(shmflg & SHM_REMAP)) - if ((shmd = find_vma_intersection(current->mm, addr, addr + shp->u.shm_segsz))) { - /* printk("shmat() -> EINVAL because the interval [0x%lx,0x%lx) intersects an already mapped interval [0x%lx,0x%lx).\n", - addr, addr + shp->shm_segsz, shmd->vm_start, shmd->vm_end); */ - goto out; - } + if (!(shmflg & SHM_REMAP) && find_vma_intersection(current->mm, addr, addr + shp->u.shm_segsz)) + goto out; err = -EACCES; if (ipcperms(&shp->u.shm_perm, shmflg & SHM_RDONLY ? S_IRUGO : S_IRUGO|S_IWUGO)) @@ -568,7 +554,7 @@ asmlinkage long sys_shmat (int shmid, char *shmaddr, int shmflg, ulong *raddr) shmd->vm_offset = 0; shmd->vm_ops = &shm_vm_ops; - shp->u.shm_nattch++; /* prevent destruction */ + shp->u.shm_nattch++; /* prevent destruction */ spin_unlock(&shm_lock); err = shm_map (shmd); spin_lock(&shm_lock); @@ -668,86 +654,76 @@ static int shm_swapout(struct vm_area_struct * vma, struct page * page) /* * page not present ... go through shm_pages */ -static unsigned long shm_nopage(struct vm_area_struct * shmd, unsigned long address, int no_share) +static struct page * shm_nopage(struct vm_area_struct * shmd, unsigned long address, int no_share) { pte_t pte; struct shmid_kernel *shp; unsigned int idx; - unsigned long page; - struct page * page_map; + struct page * page; shp = *(struct shmid_kernel **) shmd->vm_private_data; idx = (address - shmd->vm_start + shmd->vm_offset) >> PAGE_SHIFT; -#ifdef DEBUG_SHM - if (shp == IPC_UNUSED || shp == IPC_NOID) { - printk ("shm_nopage: id=%d invalid. Race.\n", id); - return 0; - } - if (idx >= shp->shm_npages) { - printk ("shm_nopage : too large page index. id=%d\n", id); - return 0; - } -#endif - spin_lock(&shm_lock); - again: - pte = __pte(shp->shm_pages[idx]); +again: + pte = shp->shm_pages[idx]; if (!pte_present(pte)) { if (pte_none(pte)) { spin_unlock(&shm_lock); - page = __get_free_page(GFP_BIGUSER); + page = get_free_highpage(GFP_HIGHUSER); if (!page) goto oom; - clear_bigpage(page); + clear_highpage(page); spin_lock(&shm_lock); - if (pte_val(pte) != shp->shm_pages[idx]) + if (pte_val(pte) != pte_val(shp->shm_pages[idx])) goto changed; } else { - unsigned long entry = pte_val(pte); + pte_t entry = pte; spin_unlock(&shm_lock); - page_map = lookup_swap_cache(entry); - if (!page_map) { + BUG(); + page = lookup_swap_cache(entry); + if (!page) { lock_kernel(); swapin_readahead(entry); - page_map = read_swap_cache(entry); + page = read_swap_cache(entry); unlock_kernel(); - if (!page_map) + if (!page) goto oom; } - delete_from_swap_cache(page_map); - page_map = replace_with_bigmem(page_map); - page = page_address(page_map); + delete_from_swap_cache(page); + page = replace_with_highmem(page); lock_kernel(); swap_free(entry); unlock_kernel(); spin_lock(&shm_lock); shm_swp--; - pte = __pte(shp->shm_pages[idx]); + pte = shp->shm_pages[idx]; if (pte_present(pte)) goto present; } shm_rss++; pte = pte_mkdirty(mk_pte(page, PAGE_SHARED)); - shp->shm_pages[idx] = pte_val(pte); + shp->shm_pages[idx] = pte; } else --current->maj_flt; /* was incremented in do_no_page */ -done: /* pte_val(pte) == shp->shm_pages[idx] */ - get_page(mem_map + MAP_NR(pte_page(pte))); +done: + /* pte_val(pte) == shp->shm_pages[idx] */ + get_page(pte_page(pte)); spin_unlock(&shm_lock); current->min_flt++; return pte_page(pte); changed: - free_page(page); + __free_page(page); goto again; present: - free_page(page); + if (page) + free_page_and_swap_cache(page); goto done; oom: - return -1; + return (struct page *)(-1); } /* @@ -760,7 +736,7 @@ int shm_swap (int prio, int gfp_mask) { pte_t page; struct shmid_kernel *shp; - unsigned long swap_nr; + pte_t swap_entry; unsigned long id, idx; int loop = 0; int counter; @@ -768,7 +744,7 @@ int shm_swap (int prio, int gfp_mask) counter = shm_rss >> prio; lock_kernel(); - if (!counter || !(swap_nr = get_swap_page())) { + if (!counter || !pte_val(swap_entry = get_swap_page())) { unlock_kernel(); return 0; } @@ -795,36 +771,37 @@ int shm_swap (int prio, int gfp_mask) if (idx >= shp->shm_npages) goto next_id; - page = __pte(shp->shm_pages[idx]); + page = shp->shm_pages[idx]; if (!pte_present(page)) goto check_table; - page_map = &mem_map[MAP_NR(pte_page(page))]; + page_map = pte_page(page); if ((gfp_mask & __GFP_DMA) && !PageDMA(page_map)) goto check_table; - if (!(gfp_mask & __GFP_BIGMEM) && PageBIGMEM(page_map)) + if (!(gfp_mask & __GFP_HIGHMEM) && PageHighMem(page_map)) goto check_table; swap_attempts++; if (--counter < 0) { /* failed */ - failed: +failed: spin_unlock(&shm_lock); lock_kernel(); - swap_free (swap_nr); + swap_free(swap_entry); unlock_kernel(); return 0; } - if (page_count(mem_map + MAP_NR(pte_page(page))) != 1) + if (page_count(page_map)) goto check_table; - if (!(page_map = prepare_bigmem_swapout(page_map))) + if (!(page_map = prepare_highmem_swapout(page_map))) goto check_table; - shp->shm_pages[idx] = swap_nr; + shp->shm_pages[idx] = swap_entry; swap_successes++; shm_swp++; shm_rss--; spin_unlock(&shm_lock); + lock_kernel(); - swap_duplicate(swap_nr); - add_to_swap_cache(page_map, swap_nr); + swap_duplicate(swap_entry); + add_to_swap_cache(page_map, swap_entry); rw_swap_page(WRITE, page_map, 0); unlock_kernel(); @@ -836,13 +813,13 @@ int shm_swap (int prio, int gfp_mask) * Free the swap entry and set the new pte for the shm page. */ static void shm_unuse_page(struct shmid_kernel *shp, unsigned long idx, - unsigned long page, unsigned long entry) + pte_t entry, struct page *page) { pte_t pte; pte = pte_mkdirty(mk_pte(page, PAGE_SHARED)); - shp->shm_pages[idx] = pte_val(pte); - get_page(mem_map + MAP_NR(page)); + shp->shm_pages[idx] = pte; + get_page(page); shm_rss++; shm_swp--; @@ -856,20 +833,21 @@ static void shm_unuse_page(struct shmid_kernel *shp, unsigned long idx, /* * unuse_shm() search for an eventually swapped out shm page. */ -void shm_unuse(unsigned long entry, unsigned long page) +void shm_unuse(pte_t entry, struct page *page) { int i, n; spin_lock(&shm_lock); - for (i = 0; i < SHMMNI; i++) - if (shm_segs[i] != IPC_UNUSED && shm_segs[i] != IPC_NOID) - for (n = 0; n < shm_segs[i]->shm_npages; n++) - if (shm_segs[i]->shm_pages[n] == entry) - { - shm_unuse_page(shm_segs[i], n, - page, entry); - return; - } + for (i = 0; i < SHMMNI; i++) { + struct shmid_kernel *seg = shm_segs[i]; + if ((seg == IPC_UNUSED) || (seg == IPC_NOID)) + continue; + for (n = 0; n < seg->shm_npages; n++) + if (pte_val(seg->shm_pages[n]) == pte_val(entry)) { + shm_unuse_page(seg, n, entry, page); + return; + } + } spin_unlock(&shm_lock); } diff --git a/kernel/fork.c b/kernel/fork.c index 37794d726ee3..9f3d9f5077af 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -157,7 +157,7 @@ int alloc_uid(struct task_struct *p) return 0; } -void __init fork_init(unsigned long memsize) +void __init fork_init(unsigned long mempages) { int i; @@ -175,7 +175,7 @@ void __init fork_init(unsigned long memsize) * value: the thread structures can take up at most half * of memory. */ - max_threads = memsize / THREAD_SIZE / 2; + max_threads = mempages / (THREAD_SIZE/PAGE_SIZE) / 2; init_task.rlim[RLIMIT_NPROC].rlim_cur = max_threads/2; init_task.rlim[RLIMIT_NPROC].rlim_max = max_threads/2; diff --git a/kernel/printk.c b/kernel/printk.c index 330ce3efebc4..843bcaeb87db 100644 --- a/kernel/printk.c +++ b/kernel/printk.c @@ -22,7 +22,7 @@ #include -#define LOG_BUF_LEN (16384) +#define LOG_BUF_LEN (16384*16) #define LOG_BUF_MASK (LOG_BUF_LEN-1) static char buf[1024]; diff --git a/kernel/ptrace.c b/kernel/ptrace.c index 4327f9d1e1a5..6d14da625447 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include #include @@ -23,7 +23,9 @@ static int access_one_page(struct task_struct * tsk, struct vm_area_struct * vma pgd_t * pgdir; pmd_t * pgmiddle; pte_t * pgtable; - unsigned long page; + unsigned long mapnr; + unsigned long maddr; + struct page *page; repeat: pgdir = pgd_offset(vma->vm_mm, addr); @@ -39,27 +41,25 @@ repeat: pgtable = pte_offset(pgmiddle, addr); if (!pte_present(*pgtable)) goto fault_in_page; - page = pte_page(*pgtable); + mapnr = pte_pagenr(*pgtable); if (write && (!pte_write(*pgtable) || !pte_dirty(*pgtable))) goto fault_in_page; - if (MAP_NR(page) >= max_mapnr) + if (mapnr >= max_mapnr) return 0; + page = mem_map + mapnr; flush_cache_page(vma, addr); - { - void *src = (void *) (page + (addr & ~PAGE_MASK)); - void *dst = buf; - if (write) { - dst = src; - src = buf; - } - src = (void *) kmap((unsigned long) src, KM_READ); - dst = (void *) kmap((unsigned long) dst, KM_WRITE); - memcpy(dst, src, len); - kunmap((unsigned long) src, KM_READ); - kunmap((unsigned long) dst, KM_WRITE); + if (write) { + maddr = kmap(page, KM_WRITE); + memcpy((char *)maddr + (addr & ~PAGE_MASK), buf, len); + flush_page_to_ram(maddr); + kunmap(maddr, KM_WRITE); + } else { + maddr = kmap(page, KM_READ); + memcpy(buf, (char *)maddr + (addr & ~PAGE_MASK), len); + flush_page_to_ram(maddr); + kunmap(maddr, KM_READ); } - flush_page_to_ram(page); return len; fault_in_page: @@ -69,11 +69,11 @@ fault_in_page: return 0; bad_pgd: - printk("ptrace: bad pgd in '%s' at %08lx (%08lx)\n", tsk->comm, addr, pgd_val(*pgdir)); + pgd_ERROR(*pgdir); return 0; bad_pmd: - printk("ptrace: bad pmd in '%s' at %08lx (%08lx)\n", tsk->comm, addr, pmd_val(*pgmiddle)); + pmd_ERROR(*pgmiddle); return 0; } diff --git a/mm/Makefile b/mm/Makefile index 68404aa6794c..31c1a6231644 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -9,11 +9,11 @@ O_TARGET := mm.o O_OBJS := memory.o mmap.o filemap.o mprotect.o mlock.o mremap.o \ - vmalloc.o slab.o \ - swap.o vmscan.o page_io.o page_alloc.o swap_state.o swapfile.o + vmalloc.o slab.o bootmem.o swap.o vmscan.o page_io.o \ + page_alloc.o swap_state.o swapfile.o -ifeq ($(CONFIG_BIGMEM),y) -O_OBJS += bigmem.o +ifeq ($(CONFIG_HIGHMEM),y) +O_OBJS += highmem.o endif include $(TOPDIR)/Rules.make diff --git a/mm/bigmem.c b/mm/bigmem.c deleted file mode 100644 index af63e860cd0b..000000000000 --- a/mm/bigmem.c +++ /dev/null @@ -1,71 +0,0 @@ -/* - * BIGMEM common code and variables. - * - * (C) 1999 Andrea Arcangeli, SuSE GmbH, andrea@suse.de - * Gerhard Wichert, Siemens AG, Gerhard.Wichert@pdb.siemens.de - */ - -#include -#include -#include - -unsigned long bigmem_mapnr; -int nr_free_bigpages = 0; - -struct page * prepare_bigmem_swapout(struct page * page) -{ - /* if this is a bigmem page so it can't be swapped out directly - otherwise the b_data buffer addresses will break - the lowlevel device drivers. */ - if (PageBIGMEM(page)) { - unsigned long regular_page; - unsigned long vaddr; - - regular_page = __get_free_page(GFP_ATOMIC); - if (!regular_page) - return NULL; - - vaddr = kmap(page_address(page), KM_READ); - copy_page(regular_page, vaddr); - kunmap(vaddr, KM_READ); - - /* ok, we can just forget about our bigmem page since - we stored its data into the new regular_page. */ - __free_page(page); - - page = MAP_NR(regular_page) + mem_map; - } - return page; -} - -struct page * replace_with_bigmem(struct page * page) -{ - if (!PageBIGMEM(page) && nr_free_bigpages) { - unsigned long kaddr; - - kaddr = __get_free_page(GFP_ATOMIC|GFP_BIGMEM); - if (kaddr) { - struct page * bigmem_page; - - bigmem_page = MAP_NR(kaddr) + mem_map; - if (PageBIGMEM(bigmem_page)) { - unsigned long vaddr; - - vaddr = kmap(kaddr, KM_WRITE); - copy_page(vaddr, page_address(page)); - kunmap(vaddr, KM_WRITE); - - /* Preserve the caching of the swap_entry. */ - bigmem_page->offset = page->offset; - - /* We can just forget the old page since - we stored its data into the new - bigmem_page. */ - __free_page(page); - - page = bigmem_page; - } - } - } - return page; -} diff --git a/mm/bootmem.c b/mm/bootmem.c new file mode 100644 index 000000000000..07dadb98184d --- /dev/null +++ b/mm/bootmem.c @@ -0,0 +1,217 @@ +/* + * linux/mm/initmem.c + * + * Copyright (C) 1999 Ingo Molnar + * + * simple boot-time physical memory area allocator and + * free memory collector. It's used to deal with reserved + * system memory and memory holes as well. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * Pointer to a bitmap - the bits represent all physical memory pages + * from physical address 0 to physical address end_mem. + * + * Access to this subsystem has to be serialized externally. (this is + * true for the boot process anyway) + */ +static void * bootmem_map = NULL; +unsigned long max_low_pfn; + +/* + * Called once to set up the allocator itself. + */ +unsigned long __init init_bootmem (unsigned long start, unsigned long pages) +{ + unsigned long mapsize = (pages+7)/8; + + if (bootmem_map) + BUG(); + bootmem_map = __va(start << PAGE_SHIFT); + max_low_pfn = pages; + + /* + * Initially all pages are reserved - setup_arch() has to + * register free RAM areas explicitly. + */ + memset(bootmem_map, 0xff, mapsize); + + return mapsize; +} + +/* + * Marks a particular physical memory range as usable. Usable RAM + * might be used for boot-time allocations - or it might get added + * to the free page pool later on. + */ +void __init reserve_bootmem (unsigned long addr, unsigned long size) +{ + unsigned long i; + /* + * round up, partially reserved pages are considered + * fully reserved. + */ + unsigned long end = (addr + size + PAGE_SIZE-1)/PAGE_SIZE; + + if (!bootmem_map) BUG(); + if (!size) BUG(); + + if (end > max_low_pfn) + BUG(); + for (i = addr/PAGE_SIZE; i < end; i++) + if (test_and_set_bit(i, bootmem_map)) + BUG(); +} + +void __init free_bootmem (unsigned long addr, unsigned long size) +{ + unsigned long i; + /* + * round down end of usable mem, partially free pages are + * considered reserved. + */ + unsigned long end = (addr + size)/PAGE_SIZE; + + if (!bootmem_map) BUG(); + if (!size) BUG(); + + if (end > max_low_pfn) + BUG(); + for (i = addr/PAGE_SIZE; i < end; i++) { + if (!test_and_clear_bit(i, bootmem_map)) + BUG(); + } +} + +/* + * We 'merge' subsequent allocations to save space. We might 'lose' + * some fraction of a page if allocations cannot be satisfied due to + * size constraints on boxes where there is physical RAM space + * fragmentation - in these cases * (mostly large memory boxes) this + * is not a problem. + * + * On low memory boxes we get it right in 100% of the cases. + */ +static unsigned long last_pos = 0; +static unsigned long last_offset = 0; + +/* + * alignment has to be a power of 2 value. + */ +void * __init __alloc_bootmem (unsigned long size, unsigned long align) +{ + int area = 0; + unsigned long i, start = 0, reserved; + void *ret; + unsigned long offset, remaining_size; + unsigned long areasize; + + if (!bootmem_map) BUG(); + if (!size) BUG(); + + areasize = (size+PAGE_SIZE-1)/PAGE_SIZE; + + for (i = 0; i < max_low_pfn; i++) { + reserved = test_bit(i, bootmem_map); + if (!reserved) { + if (!area) { + area = 1; + start = i; + } + if (i - start + 1 == areasize) + goto found; + } else { + area = 0; + start = -1; + } + } + BUG(); +found: + if (start >= max_low_pfn) + BUG(); + + /* + * Is the next page of the previous allocation-end the start + * of this allocation's buffer? If yes then we can 'merge' + * the previous partial page with this allocation. + */ + if (last_offset && (last_pos+1 == start)) { + offset = (last_offset+align-1) & ~(align-1); + if (offset > PAGE_SIZE) + BUG(); + remaining_size = PAGE_SIZE-offset; + if (remaining_size > PAGE_SIZE) + BUG(); + if (size < remaining_size) { + areasize = 0; + // last_pos unchanged + last_offset = offset+size; + ret = __va(last_pos*PAGE_SIZE + offset); + } else { + size -= remaining_size; + areasize = (size+PAGE_SIZE-1)/PAGE_SIZE; + ret = __va(last_pos*PAGE_SIZE + offset); + last_pos = start+areasize-1; + last_offset = size; + } + last_offset &= ~PAGE_MASK; + } else { + last_pos = start + areasize - 1; + last_offset = size & ~PAGE_MASK; + ret = __va(start * PAGE_SIZE); + } + /* + * Reserve the area now: + */ + for (i = start; i < start+areasize; i++) + if (test_and_set_bit(i, bootmem_map)) + BUG(); + + return ret; +} + +unsigned long __init free_all_bootmem (void) +{ + struct page * page; + unsigned long i, count, total = 0; + + if (!bootmem_map) BUG(); + + printk("freeing all bootmem().\n"); + page = mem_map; + count = 0; + for (i = 0; i < max_low_pfn; i++, page++) { + if (!test_bit(i, bootmem_map)) { + count++; + ClearPageReserved(page); + set_page_count(page, 1); + __free_page(page); + } + } + total += count; + /* + * Now free the allocator bitmap itself, it's not + * needed anymore: + */ + page = mem_map + MAP_NR(bootmem_map); + count = 0; + for (i = 0; i < (max_low_pfn/8 + PAGE_SIZE-1)/PAGE_SIZE; i++,page++) { + count++; + ClearPageReserved(page); + set_page_count(page, 1); + __free_page(page); + } + total += count; + bootmem_map = NULL; + + return total; +} diff --git a/mm/filemap.c b/mm/filemap.c index 51624abcfc51..6fdd065a7fad 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -75,24 +76,6 @@ static void remove_page_from_hash_queue(struct page * page) atomic_dec(&page_cache_size); } -static void remove_page_from_inode_queue(struct page * page) -{ - struct inode * inode = page->inode; - struct page *prev, *next; - - inode->i_nrpages--; - next = page->next; - prev = page->prev; - if (inode->i_pages == page) - inode->i_pages = next; - if (next) - next->prev = prev; - if (prev) - prev->next = next; - page->next = NULL; - page->prev = NULL; -} - /* * Remove a page from the page cache and free it. Caller has to make * sure the page is locked and that nobody else uses it - or that usage @@ -112,13 +95,17 @@ void remove_inode_page(struct page *page) void invalidate_inode_pages(struct inode * inode) { - struct page ** p; + struct list_head *head, *curr; struct page * page; + head = &inode->i_pages; repeat: spin_lock(&pagecache_lock); - p = &inode->i_pages; - while ((page = *p) != NULL) { + curr = head->next; + + while (curr != head) { + page = list_entry(curr, struct page, list); + curr = curr->next; get_page(page); if (TryLockPage(page)) { spin_unlock(&pagecache_lock); @@ -136,7 +123,6 @@ repeat: UnlockPage(page); page_cache_release(page); page_cache_release(page); - } spin_unlock(&pagecache_lock); } @@ -146,15 +132,21 @@ repeat: */ void truncate_inode_pages(struct inode * inode, unsigned long start) { - struct page ** p; + struct list_head *head, *curr; + unsigned long offset; struct page * page; int partial = 0; repeat: + head = &inode->i_pages; spin_lock(&pagecache_lock); - p = &inode->i_pages; - while ((page = *p) != NULL) { - unsigned long offset = page->offset; + curr = head->next; + while (curr != head) { + + page = list_entry(curr, struct page, list); + curr = curr->next; + + offset = page->offset; /* page wholly truncated - free it */ if (offset >= start) { @@ -190,7 +182,6 @@ repeat: */ goto repeat; } - p = &page->next; /* * there is only one partial page possible. */ @@ -200,17 +191,14 @@ repeat: offset = start - offset; /* partial truncate, clear end of page */ if (offset < PAGE_CACHE_SIZE) { - unsigned long address; get_page(page); spin_unlock(&pagecache_lock); lock_page(page); partial = 1; - address = page_address(page); - memset((void *) (offset + address), 0, PAGE_CACHE_SIZE - offset); - flush_page_to_ram(address); - + memclear_highpage_flush(page, offset, + PAGE_CACHE_SIZE-offset); if (inode->i_op->flushpage) inode->i_op->flushpage(inode, page, offset); /* @@ -255,7 +243,7 @@ int shrink_mmap(int priority, int gfp_mask) /* don't account passes over not DMA pages */ if ((gfp_mask & __GFP_DMA) && !PageDMA(page)) goto dispose_continue; - if (!(gfp_mask & __GFP_BIGMEM) && PageBIGMEM(page)) + if (!(gfp_mask & __GFP_HIGHMEM) && PageHighMem(page)) goto dispose_continue; count--; @@ -291,7 +279,7 @@ int shrink_mmap(int priority, int gfp_mask) goto unlock_continue; /* page was locked, inode can't go away under us */ if (!page->inode) { - atomic_sub(PAGE_CACHE_SIZE, &buffermem); + atomic_dec(&buffermem_pages); goto made_buffer_progress; } spin_lock(&pagecache_lock); @@ -431,16 +419,18 @@ static int waitfor_one_page(struct page *page) static int do_buffer_fdatasync(struct inode *inode, unsigned long start, unsigned long end, int (*fn)(struct page *)) { - struct page *next; + struct list_head *head, *curr; + struct page *page; int retval = 0; + head = &inode->i_pages; start &= PAGE_MASK; spin_lock(&pagecache_lock); - next = inode->i_pages; - while (next) { - struct page *page = next; - next = page->next; + curr = head->next; + while (curr != head) { + page = list_entry(curr, struct page, list); + curr = curr->next; if (!page->buffers) continue; if (page->offset >= end) @@ -458,7 +448,7 @@ static int do_buffer_fdatasync(struct inode *inode, unsigned long start, unsigne UnlockPage(page); spin_lock(&pagecache_lock); - next = page->next; + curr = page->list.next; page_cache_release(page); } spin_unlock(&pagecache_lock); @@ -487,6 +477,7 @@ static inline void __add_to_page_cache(struct page * page, struct inode * inode, unsigned long offset, struct page **hash) { + struct page *alias; unsigned long flags; flags = page->flags & ~((1 << PG_uptodate) | (1 << PG_error) | (1 << PG_referenced)); @@ -497,6 +488,9 @@ static inline void __add_to_page_cache(struct page * page, add_page_to_inode_queue(inode, page); __add_page_to_hash_queue(page, hash); lru_cache_add(page); + alias = __find_page_nolock(inode, offset, *hash); + if (alias != page) + BUG(); } void add_to_page_cache(struct page * page, struct inode * inode, unsigned long offset) @@ -532,10 +526,9 @@ int add_to_page_cache_unique(struct page * page, */ static inline void page_cache_read(struct file * file, unsigned long offset) { - unsigned long new_page; struct inode *inode = file->f_dentry->d_inode; - struct page ** hash = page_hash(inode, offset); - struct page * page; + struct page **hash = page_hash(inode, offset); + struct page *page; spin_lock(&pagecache_lock); page = __find_page_nolock(inode, offset, *hash); @@ -543,22 +536,20 @@ static inline void page_cache_read(struct file * file, unsigned long offset) if (page) return; - new_page = page_cache_alloc(); - if (!new_page) + page = page_cache_alloc(); + if (!page) return; - page = page_cache_entry(new_page); if (!add_to_page_cache_unique(page, inode, offset, hash)) { inode->i_op->readpage(file, page); page_cache_release(page); return; } - /* * We arrive here in the unlikely event that someone * raced with us and added our page to the cache first. */ - page_cache_free(new_page); + page_cache_free(page); return; } @@ -962,13 +953,13 @@ void do_generic_file_read(struct file * filp, loff_t *ppos, read_descriptor_t * { struct dentry *dentry = filp->f_dentry; struct inode *inode = dentry->d_inode; - size_t pos, pgpos, page_cache; + size_t pos, pgpos; + struct page *cached_page; int reada_ok; int error; int max_readahead = get_max_readahead(inode); - page_cache = 0; - + cached_page = NULL; pos = *ppos; pgpos = pos & PAGE_CACHE_MASK; /* @@ -1051,7 +1042,7 @@ page_ok: * "pos" here (the actor routine has to update the user buffer * pointers and the remaining count). */ - nr = actor(desc, (const char *) (page_address(page) + offset), nr); + nr = actor(desc, page, offset, nr); pos += nr; page_cache_release(page); if (nr && desc->count) @@ -1105,10 +1096,10 @@ no_cached_page: * * We get here with the page cache lock held. */ - if (!page_cache) { + if (!cached_page) { spin_unlock(&pagecache_lock); - page_cache = page_cache_alloc(); - if (!page_cache) { + cached_page = page_cache_alloc(); + if (!cached_page) { desc->error = -ENOMEM; break; } @@ -1126,29 +1117,35 @@ no_cached_page: /* * Ok, add the new page to the hash-queues... */ - page = page_cache_entry(page_cache); + page = cached_page; __add_to_page_cache(page, inode, pos & PAGE_CACHE_MASK, hash); spin_unlock(&pagecache_lock); + cached_page = NULL; - page_cache = 0; goto readpage; } *ppos = pos; filp->f_reada = 1; - if (page_cache) - page_cache_free(page_cache); + if (cached_page) + page_cache_free(cached_page); UPDATE_ATIME(inode); } -static int file_read_actor(read_descriptor_t * desc, const char *area, unsigned long size) +static int file_read_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size) { - unsigned long left; - unsigned long count = desc->count; + unsigned long kaddr; + unsigned long left, count = desc->count; if (size > count) size = count; - left = __copy_to_user(desc->buf, area, size); + /* + * FIXME: We cannot yet sleep with kmaps held. + */ + kaddr = kmap(page, KM_READ); + left = __copy_to_user(desc->buf, (void *)(kaddr+offset), size); + kunmap(kaddr, KM_READ); + if (left) { size -= left; desc->error = -EFAULT; @@ -1187,8 +1184,9 @@ ssize_t generic_file_read(struct file * filp, char * buf, size_t count, loff_t * return retval; } -static int file_send_actor(read_descriptor_t * desc, const char *area, unsigned long size) +static int file_send_actor(read_descriptor_t * desc, struct page *page, unsigned long offset , unsigned long size) { + unsigned long kaddr; ssize_t written; unsigned long count = desc->count; struct file *file = (struct file *) desc->buf; @@ -1198,7 +1196,9 @@ static int file_send_actor(read_descriptor_t * desc, const char *area, unsigned size = count; old_fs = get_fs(); set_fs(KERNEL_DS); - written = file->f_op->write(file, area, size, &file->f_pos); + kaddr = kmap(page, KM_READ); + written = file->f_op->write(file, (char *)kaddr + offset, size, &file->f_pos); + kunmap(kaddr, KM_READ); set_fs(old_fs); if (written < 0) { desc->error = written; @@ -1298,14 +1298,13 @@ out: * XXX - at some point, this should return unique values to indicate to * the caller whether this is EIO, OOM, or SIGBUS. */ -static unsigned long filemap_nopage(struct vm_area_struct * area, +static struct page * filemap_nopage(struct vm_area_struct * area, unsigned long address, int no_share) { - struct file * file = area->vm_file; - struct dentry * dentry = file->f_dentry; - struct inode * inode = dentry->d_inode; - struct page * page, **hash; - unsigned long old_page; + struct file *file = area->vm_file; + struct dentry *dentry = file->f_dentry; + struct inode *inode = dentry->d_inode; + struct page *page, **hash, *old_page; unsigned long offset = address - area->vm_start + area->vm_offset; @@ -1317,7 +1316,7 @@ static unsigned long filemap_nopage(struct vm_area_struct * area, */ if ((offset >= inode->i_size) && (area->vm_flags & VM_SHARED) && (area->vm_mm == current->mm)) - return 0; + return NULL; /* * Do we have something in the page cache already? @@ -1340,12 +1339,14 @@ success: * Found the page and have a reference on it, need to check sharing * and possibly copy it over to another page.. */ - old_page = page_address(page); + old_page = page; if (no_share) { - unsigned long new_page = page_cache_alloc(); + struct page *new_page = page_cache_alloc(); if (new_page) { - copy_page(new_page, old_page); + if (PageHighMem(new_page) || PageHighMem(old_page)) + BUG(); + copy_highpage(new_page, old_page); flush_page_to_ram(new_page); } page_cache_release(page); @@ -1411,7 +1412,7 @@ page_not_uptodate: * mm layer so, possibly freeing the page cache page first. */ page_cache_release(page); - return 0; + return NULL; } /* @@ -1419,12 +1420,11 @@ page_not_uptodate: * if the disk is full. */ static inline int do_write_page(struct inode * inode, struct file * file, - const char * page_addr, unsigned long offset) + struct page * page, unsigned long offset) { int retval; unsigned long size; int (*writepage) (struct file *, struct page *); - struct page * page; size = offset + PAGE_SIZE; /* refuse to extend file size.. */ @@ -1438,7 +1438,6 @@ static inline int do_write_page(struct inode * inode, struct file * file, size -= offset; retval = -EIO; writepage = inode->i_op->writepage; - page = mem_map + MAP_NR(page_addr); lock_page(page); retval = writepage(file, page); @@ -1449,7 +1448,7 @@ static inline int do_write_page(struct inode * inode, struct file * file, static int filemap_write_page(struct vm_area_struct * vma, unsigned long offset, - unsigned long page, + struct page * page, int wait) { int result; @@ -1466,7 +1465,7 @@ static int filemap_write_page(struct vm_area_struct * vma, * and file could be released ... increment the count to be safe. */ get_file(file); - result = do_write_page(inode, file, (const char *) page, offset); + result = do_write_page(inode, file, page, offset); fput(file); return result; } @@ -1480,7 +1479,7 @@ static int filemap_write_page(struct vm_area_struct * vma, extern void wakeup_bdflush(int); int filemap_swapout(struct vm_area_struct * vma, struct page * page) { - int retval = filemap_write_page(vma, page->offset, page_address(page), 0); + int retval = filemap_write_page(vma, page->offset, page, 0); wakeup_bdflush(0); return retval; } @@ -1489,7 +1488,6 @@ static inline int filemap_sync_pte(pte_t * ptep, struct vm_area_struct *vma, unsigned long address, unsigned int flags) { pte_t pte = *ptep; - unsigned long pageaddr; struct page *page; int error; @@ -1502,8 +1500,7 @@ static inline int filemap_sync_pte(pte_t * ptep, struct vm_area_struct *vma, flush_cache_page(vma, address); set_pte(ptep, pte_mkclean(pte)); flush_tlb_page(vma, address); - pageaddr = pte_page(pte); - page = page_cache_entry(pageaddr); + page = pte_page(pte); get_page(page); } else { if (pte_none(pte)) @@ -1512,17 +1509,19 @@ static inline int filemap_sync_pte(pte_t * ptep, struct vm_area_struct *vma, pte_clear(ptep); flush_tlb_page(vma, address); if (!pte_present(pte)) { - swap_free(pte_val(pte)); + swap_free(pte); return 0; } - pageaddr = pte_page(pte); + page = pte_page(pte); if (!pte_dirty(pte) || flags == MS_INVALIDATE) { - page_cache_free(pageaddr); + page_cache_free(page); return 0; } } - error = filemap_write_page(vma, address - vma->vm_start + vma->vm_offset, pageaddr, 1); - page_cache_free(pageaddr); + if (PageHighMem(page)) + BUG(); + error = filemap_write_page(vma, address - vma->vm_start + vma->vm_offset, page, 1); + page_cache_free(page); return error; } @@ -1537,7 +1536,7 @@ static inline int filemap_sync_pte_range(pmd_t * pmd, if (pmd_none(*pmd)) return 0; if (pmd_bad(*pmd)) { - printk("filemap_sync_pte_range: bad pmd (%08lx)\n", pmd_val(*pmd)); + pmd_ERROR(*pmd); pmd_clear(pmd); return 0; } @@ -1552,7 +1551,7 @@ static inline int filemap_sync_pte_range(pmd_t * pmd, error |= filemap_sync_pte(pte, vma, address + offset, flags); address += PAGE_SIZE; pte++; - } while (address < end); + } while (address && (address < end)); return error; } @@ -1567,7 +1566,7 @@ static inline int filemap_sync_pmd_range(pgd_t * pgd, if (pgd_none(*pgd)) return 0; if (pgd_bad(*pgd)) { - printk("filemap_sync_pmd_range: bad pgd (%08lx)\n", pgd_val(*pgd)); + pgd_ERROR(*pgd); pgd_clear(pgd); return 0; } @@ -1582,7 +1581,7 @@ static inline int filemap_sync_pmd_range(pgd_t * pgd, error |= filemap_sync_pte_range(pmd, address, end - address, vma, offset, flags); address = (address + PMD_SIZE) & PMD_MASK; pmd++; - } while (address < end); + } while (address && (address < end)); return error; } @@ -1595,11 +1594,13 @@ static int filemap_sync(struct vm_area_struct * vma, unsigned long address, dir = pgd_offset(vma->vm_mm, address); flush_cache_range(vma->vm_mm, end - size, end); - while (address < end) { + if (address >= end) + BUG(); + do { error |= filemap_sync_pmd_range(dir, address, end - address, vma, flags); address = (address + PGDIR_SIZE) & PGDIR_MASK; dir++; - } + } while (address && (address < end)); flush_tlb_range(vma->vm_mm, end - size, end); return error; } @@ -1775,12 +1776,13 @@ generic_file_write(struct file *file, const char *buf, struct inode *inode = dentry->d_inode; unsigned long pos = *ppos; unsigned long limit = current->rlim[RLIMIT_FSIZE].rlim_cur; - struct page *page, **hash; - unsigned long page_cache = 0; + struct page *page, **hash, *cached_page; unsigned long written; long status; int err; + cached_page = NULL; + down(&inode->i_sem); err = file->f_error; if (err) { @@ -1828,18 +1830,18 @@ generic_file_write(struct file *file, const char *buf, repeat_find: page = __find_lock_page(inode, pgpos, hash); if (!page) { - if (!page_cache) { - page_cache = page_cache_alloc(); - if (page_cache) + if (!cached_page) { + cached_page = page_cache_alloc(); + if (cached_page) goto repeat_find; status = -ENOMEM; break; } - page = page_cache_entry(page_cache); + page = cached_page; if (add_to_page_cache_unique(page,inode,pgpos,hash)) goto repeat_find; - page_cache = 0; + cached_page = NULL; } /* We have exclusive IO access to the page.. */ @@ -1870,8 +1872,8 @@ repeat_find: } *ppos = pos; - if (page_cache) - page_cache_free(page_cache); + if (cached_page) + page_cache_free(cached_page); err = written ? written : status; out: @@ -1897,11 +1899,11 @@ void put_cached_page(unsigned long addr) page_cache_release(page); } -void __init page_cache_init(unsigned long memory_size) +void __init page_cache_init(unsigned long mempages) { unsigned long htable_size, order; - htable_size = memory_size >> PAGE_SHIFT; + htable_size = mempages; htable_size *= sizeof(struct page *); for(order = 0; (PAGE_SIZE << order) < htable_size; order++) ; @@ -1921,5 +1923,5 @@ void __init page_cache_init(unsigned long memory_size) (1 << page_hash_bits), order, (PAGE_SIZE << order)); if (!page_hash_table) panic("Failed to allocate page hash table\n"); - memset(page_hash_table, 0, PAGE_HASH_SIZE * sizeof(struct page *)); + memset((void *)page_hash_table, 0, PAGE_HASH_SIZE * sizeof(struct page *)); } diff --git a/mm/highmem.c b/mm/highmem.c new file mode 100644 index 000000000000..7665393cf5e9 --- /dev/null +++ b/mm/highmem.c @@ -0,0 +1,81 @@ +/* + * High memory handling common code and variables. + * + * (C) 1999 Andrea Arcangeli, SuSE GmbH, andrea@suse.de + * Gerhard Wichert, Siemens AG, Gerhard.Wichert@pdb.siemens.de + * + * Redesigned the x86 32-bit VM architecture to deal with + * 64-bit physical space. With current x86 CPUs this + * means up to 64 Gigabytes physical RAM. + * + * Copyright (C) 1999 Ingo Molnar + */ + +#include +#include +#include + +unsigned long highmem_mapnr; +unsigned long nr_free_highpages = 0; + +struct page * prepare_highmem_swapout(struct page * page) +{ + unsigned long regular_page; + unsigned long vaddr; + /* + * If this is a highmem page so it can't be swapped out directly + * otherwise the b_data buffer addresses will break + * the lowlevel device drivers. + */ + if (!PageHighMem(page)) + return page; + + regular_page = __get_free_page(GFP_ATOMIC); + if (!regular_page) + return NULL; + + vaddr = kmap(page, KM_READ); + copy_page((void *)regular_page, (void *)vaddr); + kunmap(vaddr, KM_READ); + + /* + * ok, we can just forget about our highmem page since + * we stored its data into the new regular_page. + */ + __free_page(page); + + return mem_map + MAP_NR(regular_page); +} + +struct page * replace_with_highmem(struct page * page) +{ + struct page *highpage; + unsigned long vaddr; + + if (PageHighMem(page) || !nr_free_highpages) + return page; + + highpage = get_free_highpage(GFP_ATOMIC|__GFP_HIGHMEM); + if (!highpage) + return page; + if (!PageHighMem(highpage)) { + __free_page(highpage); + return page; + } + + vaddr = kmap(highpage, KM_WRITE); + copy_page((void *)vaddr, (void *)page_address(page)); + kunmap(vaddr, KM_WRITE); + + /* Preserve the caching of the swap_entry. */ + highpage->offset = page->offset; + highpage->inode = page->inode; + + /* + * We can just forget the old page since + * we stored its data into the new highmem-page. + */ + __free_page(page); + + return highpage; +} diff --git a/mm/memory.c b/mm/memory.c index 5498dbcf0361..889793928286 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -43,7 +43,7 @@ #include #include #include -#include +#include #include #include @@ -51,19 +51,20 @@ unsigned long max_mapnr = 0; unsigned long num_physpages = 0; void * high_memory = NULL; +struct page *highmem_start_page; /* * We special-case the C-O-W ZERO_PAGE, because it's such * a common occurrence (no need to read the page to know * that it's zero - better for the cache and memory subsystem). */ -static inline void copy_cow_page(unsigned long from, unsigned long to) +static inline void copy_cow_page(struct page * from, struct page * to) { if (from == ZERO_PAGE(to)) { - clear_bigpage(to); + clear_highpage(to); return; } - copy_bigpage(to, from); + copy_highpage(to, from); } mem_map_t * mem_map = NULL; @@ -89,7 +90,7 @@ static inline void free_one_pmd(pmd_t * dir) if (pmd_none(*dir)) return; if (pmd_bad(*dir)) { - printk("free_one_pmd: bad directory entry %08lx\n", pmd_val(*dir)); + pmd_ERROR(*dir); pmd_clear(dir); return; } @@ -106,7 +107,7 @@ static inline void free_one_pgd(pgd_t * dir) if (pgd_none(*dir)) return; if (pgd_bad(*dir)) { - printk("free_one_pgd: bad directory entry %08lx\n", pgd_val(*dir)); + pgd_ERROR(*dir); pgd_clear(dir); return; } @@ -179,11 +180,10 @@ int copy_page_range(struct mm_struct *dst, struct mm_struct *src, if (pgd_none(*src_pgd)) goto skip_copy_pmd_range; if (pgd_bad(*src_pgd)) { - printk("copy_pmd_range: bad pgd (%08lx)\n", - pgd_val(*src_pgd)); + pgd_ERROR(*src_pgd); pgd_clear(src_pgd); skip_copy_pmd_range: address = (address + PGDIR_SIZE) & PGDIR_MASK; - if (address >= end) + if (!address || (address >= end)) goto out; continue; } @@ -203,7 +203,7 @@ skip_copy_pmd_range: address = (address + PGDIR_SIZE) & PGDIR_MASK; if (pmd_none(*src_pmd)) goto skip_copy_pte_range; if (pmd_bad(*src_pmd)) { - printk("copy_pte_range: bad pmd (%08lx)\n", pmd_val(*src_pmd)); + pmd_ERROR(*src_pmd); pmd_clear(src_pmd); skip_copy_pte_range: address = (address + PMD_SIZE) & PMD_MASK; if (address >= end) @@ -227,11 +227,11 @@ skip_copy_pte_range: address = (address + PMD_SIZE) & PMD_MASK; if (pte_none(pte)) goto cont_copy_pte_range; if (!pte_present(pte)) { - swap_duplicate(pte_val(pte)); + swap_duplicate(pte); set_pte(dst_pte, pte); goto cont_copy_pte_range; } - page_nr = MAP_NR(pte_page(pte)); + page_nr = pte_pagenr(pte); if (page_nr >= max_mapnr || PageReserved(mem_map+page_nr)) { set_pte(dst_pte, pte); @@ -272,17 +272,17 @@ nomem: static inline int free_pte(pte_t page) { if (pte_present(page)) { - unsigned long addr = pte_page(page); - if (MAP_NR(addr) >= max_mapnr || PageReserved(mem_map+MAP_NR(addr))) + unsigned long nr = pte_pagenr(page); + if (nr >= max_mapnr || PageReserved(mem_map+nr)) return 0; /* * free_page() used to be able to clear swap cache * entries. We may now have to do it manually. */ - free_page_and_swap_cache(addr); + free_page_and_swap_cache(mem_map+nr); return 1; } - swap_free(pte_val(page)); + swap_free(page); return 0; } @@ -302,7 +302,7 @@ static inline int zap_pte_range(struct mm_struct *mm, pmd_t * pmd, unsigned long if (pmd_none(*pmd)) return 0; if (pmd_bad(*pmd)) { - printk("zap_pte_range: bad pmd (%08lx)\n", pmd_val(*pmd)); + pmd_ERROR(*pmd); pmd_clear(pmd); return 0; } @@ -336,7 +336,7 @@ static inline int zap_pmd_range(struct mm_struct *mm, pgd_t * dir, unsigned long if (pgd_none(*dir)) return 0; if (pgd_bad(*dir)) { - printk("zap_pmd_range: bad pgd (%08lx)\n", pgd_val(*dir)); + pgd_ERROR(*dir); pgd_clear(dir); return 0; } @@ -372,12 +372,14 @@ void zap_page_range(struct mm_struct *mm, unsigned long address, unsigned long s * even if kswapd happened to be looking at this * process we _want_ it to get stuck. */ + if (address >= end) + BUG(); spin_lock(&mm->page_table_lock); - while (address < end) { + do { freed += zap_pmd_range(mm, dir, address, end - address); address = (address + PGDIR_SIZE) & PGDIR_MASK; dir++; - } + } while (address && (address < end)); spin_unlock(&mm->page_table_lock); /* * Update rss for the mm_struct (not necessarily current->mm) @@ -393,7 +395,7 @@ void zap_page_range(struct mm_struct *mm, unsigned long address, unsigned long s /* * Do a quick page-table lookup for a single page. */ -static unsigned long follow_page(unsigned long address) +static struct page * follow_page(unsigned long address) { pgd_t *pgd; pmd_t *pmd; @@ -402,31 +404,27 @@ static unsigned long follow_page(unsigned long address) pmd = pmd_offset(pgd, address); if (pmd) { pte_t * pte = pte_offset(pmd, address); - if (pte && pte_present(*pte)) { + if (pte && pte_present(*pte)) return pte_page(*pte); - } } printk(KERN_ERR "Missing page in follow_page\n"); - return 0; + return NULL; } /* * Given a physical address, is there a useful struct page pointing to it? */ -static struct page * get_page_map(unsigned long page) +struct page * get_page_map(struct page *page) { - struct page *map; - if (MAP_NR(page) >= max_mapnr) return 0; if (page == ZERO_PAGE(page)) return 0; - map = mem_map + MAP_NR(page); - if (PageReserved(map)) + if (PageReserved(page)) return 0; - return map; + return page; } /* @@ -441,7 +439,6 @@ int map_user_kiobuf(int rw, struct kiobuf *iobuf, unsigned long va, size_t len) int err; struct mm_struct * mm; struct vm_area_struct * vma = 0; - unsigned long page; struct page * map; int doublepage = 0; int repeat = 0; @@ -482,13 +479,12 @@ int map_user_kiobuf(int rw, struct kiobuf *iobuf, unsigned long va, size_t len) if (handle_mm_fault(current, vma, ptr, (rw==READ)) <= 0) goto out_unlock; spin_lock(&mm->page_table_lock); - page = follow_page(ptr); - if (!page) { + map = follow_page(ptr); + if (!map) { dprintk (KERN_ERR "Missing page in map_user_kiobuf\n"); - map = NULL; goto retry; } - map = get_page_map(page); + map = get_page_map(map); if (map) { if (TryLockPage(map)) { goto retry; @@ -496,8 +492,6 @@ int map_user_kiobuf(int rw, struct kiobuf *iobuf, unsigned long va, size_t len) atomic_inc(&map->count); } spin_unlock(&mm->page_table_lock); - dprintk ("Installing page %p %p: %d\n", (void *)page, map, i); - iobuf->pagelist[i] = page; iobuf->maplist[i] = map; iobuf->nr_pages = ++i; @@ -585,14 +579,13 @@ static inline void zeromap_pte_range(pte_t * pte, unsigned long address, if (end > PMD_SIZE) end = PMD_SIZE; do { - pte_t zero_pte = pte_wrprotect(mk_pte(ZERO_PAGE(address), - prot)); + pte_t zero_pte = pte_wrprotect(mk_pte(ZERO_PAGE(address), prot)); pte_t oldpage = *pte; set_pte(pte, zero_pte); forget_pte(oldpage); address += PAGE_SIZE; pte++; - } while (address < end); + } while (address && (address < end)); } static inline int zeromap_pmd_range(pmd_t * pmd, unsigned long address, @@ -611,7 +604,7 @@ static inline int zeromap_pmd_range(pmd_t * pmd, unsigned long address, zeromap_pte_range(pte, address, end - address, prot); address = (address + PMD_SIZE) & PMD_MASK; pmd++; - } while (address < end); + } while (address && (address < end)); return 0; } @@ -624,7 +617,9 @@ int zeromap_page_range(unsigned long address, unsigned long size, pgprot_t prot) dir = pgd_offset(current->mm, address); flush_cache_range(current->mm, beg, end); - while (address < end) { + if (address >= end) + BUG(); + do { pmd_t *pmd = pmd_alloc(dir, address); error = -ENOMEM; if (!pmd) @@ -634,7 +629,7 @@ int zeromap_page_range(unsigned long address, unsigned long size, pgprot_t prot) break; address = (address + PGDIR_SIZE) & PGDIR_MASK; dir++; - } + } while (address && (address < end)); flush_tlb_range(current->mm, beg, end); return error; } @@ -665,7 +660,7 @@ static inline void remap_pte_range(pte_t * pte, unsigned long address, unsigned address += PAGE_SIZE; phys_addr += PAGE_SIZE; pte++; - } while (address < end); + } while (address && (address < end)); } static inline int remap_pmd_range(pmd_t * pmd, unsigned long address, unsigned long size, @@ -685,7 +680,7 @@ static inline int remap_pmd_range(pmd_t * pmd, unsigned long address, unsigned l remap_pte_range(pte, address, end - address, address + phys_addr, prot); address = (address + PMD_SIZE) & PMD_MASK; pmd++; - } while (address < end); + } while (address && (address < end)); return 0; } @@ -699,7 +694,9 @@ int remap_page_range(unsigned long from, unsigned long phys_addr, unsigned long phys_addr -= from; dir = pgd_offset(current->mm, from); flush_cache_range(current->mm, beg, end); - while (from < end) { + if (from >= end) + BUG(); + do { pmd_t *pmd = pmd_alloc(dir, from); error = -ENOMEM; if (!pmd) @@ -709,7 +706,7 @@ int remap_page_range(unsigned long from, unsigned long phys_addr, unsigned long break; from = (from + PGDIR_SIZE) & PGDIR_MASK; dir++; - } + } while (from && (from < end)); flush_tlb_range(current->mm, beg, end); return error; } @@ -718,37 +715,35 @@ int remap_page_range(unsigned long from, unsigned long phys_addr, unsigned long * This routine is used to map in a page into an address space: needed by * execve() for the initial stack and environment pages. */ -unsigned long put_dirty_page(struct task_struct * tsk, unsigned long page, unsigned long address) +struct page * put_dirty_page(struct task_struct * tsk, struct page *page, + unsigned long address) { pgd_t * pgd; pmd_t * pmd; pte_t * pte; - if (MAP_NR(page) >= max_mapnr) - printk("put_dirty_page: trying to put page %08lx at %08lx\n",page,address); - if (page_count(mem_map + MAP_NR(page)) != 1) - printk("mem_map disagrees with %08lx at %08lx\n",page,address); - pgd = pgd_offset(tsk->mm,address); + if (page_count(page) != 1) + printk("mem_map disagrees with %p at %08lx\n", page, address); + pgd = pgd_offset(tsk->mm, address); pmd = pmd_alloc(pgd, address); if (!pmd) { - free_page(page); + __free_page(page); oom(tsk); return 0; } pte = pte_alloc(pmd, address); if (!pte) { - free_page(page); + __free_page(page); oom(tsk); return 0; } if (!pte_none(*pte)) { - printk("put_dirty_page: pte %08lx already exists\n", - pte_val(*pte)); - free_page(page); + pte_ERROR(*pte); + __free_page(page); return 0; } - flush_page_to_ram(page); - set_pte(pte, pte_mkwrite(pte_mkdirty(mk_pte(page, PAGE_COPY)))); + flush_page_to_ram(pte_page(page)); + set_pte(pte, pte_mkwrite(page_pte_prot(page, PAGE_COPY))); /* no need for flush_tlb */ return page; } @@ -776,14 +771,14 @@ unsigned long put_dirty_page(struct task_struct * tsk, unsigned long page, unsig static int do_wp_page(struct task_struct * tsk, struct vm_area_struct * vma, unsigned long address, pte_t *page_table, pte_t pte) { - unsigned long old_page, new_page; - struct page * page; + unsigned long map_nr; + struct page *old_page, *new_page; - old_page = pte_page(pte); - if (MAP_NR(old_page) >= max_mapnr) + map_nr = pte_pagenr(pte); + if (map_nr >= max_mapnr) goto bad_wp_page; tsk->min_flt++; - page = mem_map + MAP_NR(old_page); + old_page = mem_map + map_nr; /* * We can avoid the copy if: @@ -793,13 +788,13 @@ static int do_wp_page(struct task_struct * tsk, struct vm_area_struct * vma, * in which case we can remove the page * from the swap cache. */ - switch (page_count(page)) { + switch (page_count(old_page)) { case 2: - if (!PageSwapCache(page)) + if (!PageSwapCache(old_page)) break; - if (swap_count(page->offset) != 1) + if (swap_count(old_page) != 1) break; - delete_from_swap_cache(page); + delete_from_swap_cache(old_page); /* FallThrough */ case 1: flush_cache_page(vma, address); @@ -813,7 +808,7 @@ static int do_wp_page(struct task_struct * tsk, struct vm_area_struct * vma, * Ok, we need to copy. Oh, well.. */ spin_unlock(&tsk->mm->page_table_lock); - new_page = __get_free_page(GFP_BIGUSER); + new_page = get_free_highpage(GFP_HIGHUSER); if (!new_page) return -1; spin_lock(&tsk->mm->page_table_lock); @@ -822,9 +817,9 @@ static int do_wp_page(struct task_struct * tsk, struct vm_area_struct * vma, * Re-check the pte - we dropped the lock */ if (pte_val(*page_table) == pte_val(pte)) { - if (PageReserved(page)) + if (PageReserved(old_page)) ++vma->vm_mm->rss; - copy_cow_page(old_page,new_page); + copy_cow_page(old_page, new_page); flush_page_to_ram(new_page); flush_cache_page(vma, address); set_pte(page_table, pte_mkwrite(pte_mkdirty(mk_pte(new_page, vma->vm_page_prot)))); @@ -834,12 +829,12 @@ static int do_wp_page(struct task_struct * tsk, struct vm_area_struct * vma, new_page = old_page; } spin_unlock(&tsk->mm->page_table_lock); - free_page(new_page); + __free_page(new_page); return 1; bad_wp_page: spin_unlock(&tsk->mm->page_table_lock); - printk("do_wp_page: bogus page at address %08lx (%08lx)\n",address,old_page); + printk("do_wp_page: bogus page at address %08lx (nr %ld)\n",address,map_nr); return -1; } @@ -848,6 +843,8 @@ bad_wp_page: */ static void partial_clear(struct vm_area_struct *vma, unsigned long address) { + unsigned int offset; + struct page *page; pgd_t *page_dir; pmd_t *page_middle; pte_t *page_table, pte; @@ -856,7 +853,7 @@ static void partial_clear(struct vm_area_struct *vma, unsigned long address) if (pgd_none(*page_dir)) return; if (pgd_bad(*page_dir)) { - printk("bad page table directory entry %p:[%lx]\n", page_dir, pgd_val(*page_dir)); + pgd_ERROR(*page_dir); pgd_clear(page_dir); return; } @@ -864,7 +861,7 @@ static void partial_clear(struct vm_area_struct *vma, unsigned long address) if (pmd_none(*page_middle)) return; if (pmd_bad(*page_middle)) { - printk("bad page table directory entry %p:[%lx]\n", page_dir, pgd_val(*page_dir)); + pmd_ERROR(*page_middle); pmd_clear(page_middle); return; } @@ -873,12 +870,11 @@ static void partial_clear(struct vm_area_struct *vma, unsigned long address) if (!pte_present(pte)) return; flush_cache_page(vma, address); - address &= ~PAGE_MASK; - address += pte_page(pte); - if (MAP_NR(address) >= max_mapnr) + page = pte_page(pte); + if (page-mem_map >= max_mapnr) return; - memset((void *) address, 0, PAGE_SIZE - (address & ~PAGE_MASK)); - flush_page_to_ram(pte_page(pte)); + offset = address & ~PAGE_MASK; + memclear_highpage_flush(page, offset, PAGE_SIZE - offset); } /* @@ -939,7 +935,7 @@ out_unlock: * because it doesn't cost us any seek time. We also make sure to queue * the 'original' request together with the readahead ones... */ -void swapin_readahead(unsigned long entry) +void swapin_readahead(pte_t entry) { int i; struct page *new_page; @@ -973,7 +969,7 @@ void swapin_readahead(unsigned long entry) static int do_swap_page(struct task_struct * tsk, struct vm_area_struct * vma, unsigned long address, - pte_t * page_table, unsigned long entry, int write_access) + pte_t * page_table, pte_t entry, int write_access) { struct page *page = lookup_swap_cache(entry); pte_t pte; @@ -986,7 +982,7 @@ static int do_swap_page(struct task_struct * tsk, if (!page) return -1; - flush_page_to_ram(page_address(page)); + flush_page_to_ram(page); } vma->vm_mm->rss++; @@ -995,13 +991,13 @@ static int do_swap_page(struct task_struct * tsk, swap_free(entry); unlock_kernel(); - pte = mk_pte(page_address(page), vma->vm_page_prot); + pte = mk_pte(page, vma->vm_page_prot); set_bit(PG_swap_entry, &page->flags); if (write_access && !is_page_shared(page)) { delete_from_swap_cache(page); - page = replace_with_bigmem(page); - pte = mk_pte(page_address(page), vma->vm_page_prot); + page = replace_with_highmem(page); + pte = mk_pte(page, vma->vm_page_prot); pte = pte_mkwrite(pte_mkdirty(pte)); } set_pte(page_table, pte); @@ -1015,12 +1011,16 @@ static int do_swap_page(struct task_struct * tsk, */ static int do_anonymous_page(struct task_struct * tsk, struct vm_area_struct * vma, pte_t *page_table, int write_access, unsigned long addr) { + int high = 0; + struct page *page = NULL; pte_t entry = pte_wrprotect(mk_pte(ZERO_PAGE(addr), vma->vm_page_prot)); if (write_access) { - unsigned long page = __get_free_page(GFP_BIGUSER); + page = get_free_highpage(GFP_HIGHUSER); if (!page) return -1; - clear_bigpage(page); + if (PageHighMem(page)) + high = 1; + clear_highpage(page); entry = pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot))); vma->vm_mm->rss++; tsk->min_flt++; @@ -1047,7 +1047,7 @@ static int do_anonymous_page(struct task_struct * tsk, struct vm_area_struct * v static int do_no_page(struct task_struct * tsk, struct vm_area_struct * vma, unsigned long address, int write_access, pte_t *page_table) { - unsigned long page; + struct page * new_page; pte_t entry; if (!vma->vm_ops || !vma->vm_ops->nopage) @@ -1058,12 +1058,11 @@ static int do_no_page(struct task_struct * tsk, struct vm_area_struct * vma, * to copy, not share the page even if sharing is possible. It's * essentially an early COW detection. */ - page = vma->vm_ops->nopage(vma, address & PAGE_MASK, (vma->vm_flags & VM_SHARED)?0:write_access); - if (!page) + new_page = vma->vm_ops->nopage(vma, address & PAGE_MASK, (vma->vm_flags & VM_SHARED)?0:write_access); + if (!new_page) return 0; /* SIGBUS - but we _really_ should know whether it is OOM or SIGBUS */ - if (page == -1) + if (new_page == (struct page *)-1) return -1; /* OOM */ - ++tsk->maj_flt; ++vma->vm_mm->rss; /* @@ -1076,11 +1075,11 @@ static int do_no_page(struct task_struct * tsk, struct vm_area_struct * vma, * so we can make it writable and dirty to avoid having to * handle that later. */ - flush_page_to_ram(page); - entry = mk_pte(page, vma->vm_page_prot); + flush_page_to_ram(new_page); + entry = mk_pte(new_page, vma->vm_page_prot); if (write_access) { entry = pte_mkwrite(pte_mkdirty(entry)); - } else if (page_count(mem_map+MAP_NR(page)) > 1 && + } else if (page_count(new_page) > 1 && !(vma->vm_flags & VM_SHARED)) entry = pte_wrprotect(entry); set_pte(page_table, entry); @@ -1117,7 +1116,7 @@ static inline int handle_pte_fault(struct task_struct *tsk, if (!pte_present(entry)) { if (pte_none(entry)) return do_no_page(tsk, vma, address, write_access, pte); - return do_swap_page(tsk, vma, address, pte, pte_val(entry), write_access); + return do_swap_page(tsk, vma, address, pte, entry, write_access); } /* @@ -1148,17 +1147,19 @@ static inline int handle_pte_fault(struct task_struct *tsk, int handle_mm_fault(struct task_struct *tsk, struct vm_area_struct * vma, unsigned long address, int write_access) { + int ret = -1; pgd_t *pgd; pmd_t *pmd; pgd = pgd_offset(vma->vm_mm, address); pmd = pmd_alloc(pgd, address); + if (pmd) { pte_t * pte = pte_alloc(pmd, address); if (pte) - return handle_pte_fault(tsk, vma, address, write_access, pte); + ret = handle_pte_fault(tsk, vma, address, write_access, pte); } - return -1; + return ret; } /* @@ -1172,10 +1173,12 @@ int make_pages_present(unsigned long addr, unsigned long end) vma = find_vma(tsk->mm, addr); write = (vma->vm_flags & VM_WRITE) != 0; - while (addr < end) { + if (addr >= end) + BUG(); + do { if (handle_mm_fault(tsk, vma, addr, write) < 0) return -1; addr += PAGE_SIZE; - } + } while (addr < end); return 0; } diff --git a/mm/mmap.c b/mm/mmap.c index 14413b3082e0..a42e9a4cc7ae 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -62,7 +62,7 @@ int vm_enough_memory(long pages) if (sysctl_overcommit_memory) return 1; - free = atomic_read(&buffermem) >> PAGE_SHIFT; + free = atomic_read(&buffermem_pages); free += atomic_read(&page_cache_size); free += nr_free_pages; free += nr_swap_pages; diff --git a/mm/mprotect.c b/mm/mprotect.c index de2fd6917fa1..56454fc07099 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -20,7 +20,7 @@ static inline void change_pte_range(pmd_t * pmd, unsigned long address, if (pmd_none(*pmd)) return; if (pmd_bad(*pmd)) { - printk("change_pte_range: bad pmd (%08lx)\n", pmd_val(*pmd)); + pmd_ERROR(*pmd); pmd_clear(pmd); return; } @@ -35,7 +35,7 @@ static inline void change_pte_range(pmd_t * pmd, unsigned long address, set_pte(pte, pte_modify(entry, newprot)); address += PAGE_SIZE; pte++; - } while (address < end); + } while (address && (address < end)); } static inline void change_pmd_range(pgd_t * pgd, unsigned long address, @@ -47,7 +47,7 @@ static inline void change_pmd_range(pgd_t * pgd, unsigned long address, if (pgd_none(*pgd)) return; if (pgd_bad(*pgd)) { - printk("change_pmd_range: bad pgd (%08lx)\n", pgd_val(*pgd)); + pgd_ERROR(*pgd); pgd_clear(pgd); return; } @@ -60,7 +60,7 @@ static inline void change_pmd_range(pgd_t * pgd, unsigned long address, change_pte_range(pmd, address, end - address, newprot); address = (address + PMD_SIZE) & PMD_MASK; pmd++; - } while (address < end); + } while (address && (address < end)); } static void change_protection(unsigned long start, unsigned long end, pgprot_t newprot) @@ -70,11 +70,13 @@ static void change_protection(unsigned long start, unsigned long end, pgprot_t n dir = pgd_offset(current->mm, start); flush_cache_range(current->mm, beg, end); - while (start < end) { + if (start >= end) + BUG(); + do { change_pmd_range(dir, start, end - start, newprot); start = (start + PGDIR_SIZE) & PGDIR_MASK; dir++; - } + } while (start && (start < end)); flush_tlb_range(current->mm, beg, end); return; } diff --git a/mm/mremap.c b/mm/mremap.c index 101e513108d9..b73996dc2eb3 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -25,7 +25,7 @@ static inline pte_t *get_one_pte(struct mm_struct *mm, unsigned long addr) if (pgd_none(*pgd)) goto end; if (pgd_bad(*pgd)) { - printk("move_one_page: bad source pgd (%08lx)\n", pgd_val(*pgd)); + pgd_ERROR(*pgd); pgd_clear(pgd); goto end; } @@ -34,7 +34,7 @@ static inline pte_t *get_one_pte(struct mm_struct *mm, unsigned long addr) if (pmd_none(*pmd)) goto end; if (pmd_bad(*pmd)) { - printk("move_one_page: bad source pmd (%08lx)\n", pmd_val(*pmd)); + pmd_ERROR(*pmd); pmd_clear(pmd); goto end; } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index b62783c72332..772a30057aba 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -14,7 +14,8 @@ #include #include #include -#include /* export bigmem vars */ +#include +#include #include #include /* for copy_to/from_user */ @@ -40,46 +41,18 @@ LIST_HEAD(lru_cache); #define NR_MEM_LISTS 10 #endif -/* The start of this MUST match the start of "struct page" */ struct free_area_struct { - struct page *next; - struct page *prev; + struct list_head free_list; unsigned int * map; }; -#define memory_head(x) ((struct page *)(x)) - -#ifdef CONFIG_BIGMEM -#define BIGMEM_LISTS_OFFSET NR_MEM_LISTS +#ifdef CONFIG_HIGHMEM +#define HIGHMEM_LISTS_OFFSET NR_MEM_LISTS static struct free_area_struct free_area[NR_MEM_LISTS*2]; #else static struct free_area_struct free_area[NR_MEM_LISTS]; #endif -static inline void init_mem_queue(struct free_area_struct * head) -{ - head->next = memory_head(head); - head->prev = memory_head(head); -} - -static inline void add_mem_queue(struct free_area_struct * head, struct page * entry) -{ - struct page * next = head->next; - - entry->prev = memory_head(head); - entry->next = next; - next->prev = entry; - head->next = entry; -} - -static inline void remove_mem_queue(struct page * entry) -{ - struct page * next = entry->next; - struct page * prev = entry->prev; - next->prev = prev; - prev->next = next; -} - /* * Free_page() adds the page to the free lists. This is optimized for * fast normal cases (no error jumps taken normally). @@ -99,41 +72,67 @@ static inline void remove_mem_queue(struct page * entry) */ spinlock_t page_alloc_lock = SPIN_LOCK_UNLOCKED; +#define memlist_init(x) INIT_LIST_HEAD(x) +#define memlist_add_head list_add +#define memlist_add_tail list_add_tail +#define memlist_del list_del +#define memlist_entry list_entry +#define memlist_next(x) ((x)->next) +#define memlist_prev(x) ((x)->prev) + static inline void free_pages_ok(unsigned long map_nr, unsigned long order) { struct free_area_struct *area = free_area + order; unsigned long index = map_nr >> (1 + order); unsigned long mask = (~0UL) << order; unsigned long flags; + struct page *page, *buddy; spin_lock_irqsave(&page_alloc_lock, flags); #define list(x) (mem_map+(x)) -#ifdef CONFIG_BIGMEM - if (map_nr >= bigmem_mapnr) { - area += BIGMEM_LISTS_OFFSET; - nr_free_bigpages -= mask; +#ifdef CONFIG_HIGHMEM + if (map_nr >= highmem_mapnr) { + area += HIGHMEM_LISTS_OFFSET; + nr_free_highpages -= mask; } #endif map_nr &= mask; nr_free_pages -= mask; + while (mask + (1 << (NR_MEM_LISTS-1))) { if (!test_and_change_bit(index, area->map)) + /* + * the buddy page is still allocated. + */ break; - remove_mem_queue(list(map_nr ^ -mask)); + /* + * Move the buddy up one level. + */ + buddy = list(map_nr ^ -mask); + page = list(map_nr); + + memlist_del(&buddy->list); mask <<= 1; area++; index >>= 1; map_nr &= mask; } - add_mem_queue(area, list(map_nr)); - + memlist_add_head(&(list(map_nr))->list, &area->free_list); #undef list spin_unlock_irqrestore(&page_alloc_lock, flags); } +/* + * Some ugly macros to speed up __get_free_pages().. + */ +#define MARK_USED(index, order, area) \ + change_bit((index) >> (1+(order)), (area)->map) +#define CAN_DMA(x) (PageDMA(x)) +#define ADDRESS(x) (PAGE_OFFSET + ((x) << PAGE_SHIFT)) + int __free_page(struct page *page) { if (!PageReserved(page) && put_page_testzero(page)) { @@ -142,7 +141,7 @@ int __free_page(struct page *page) if (PageLocked(page)) PAGE_BUG(page); - free_pages_ok(page - mem_map, 0); + free_pages_ok(page-mem_map, 0); return 1; } return 0; @@ -166,148 +165,146 @@ int free_pages(unsigned long addr, unsigned long order) return 0; } -/* - * Some ugly macros to speed up __get_free_pages().. - */ -#define MARK_USED(index, order, area) \ - change_bit((index) >> (1+(order)), (area)->map) -#define CAN_DMA(x) (PageDMA(x)) -#define ADDRESS(x) (PAGE_OFFSET + ((x) << PAGE_SHIFT)) +static inline unsigned long EXPAND (struct page *map, unsigned long index, + int low, int high, struct free_area_struct * area) +{ + unsigned long size = 1 << high; + + while (high > low) { + area--; + high--; + size >>= 1; + memlist_add_head(&(map)->list, &(area)->free_list); + MARK_USED(index, high, area); + index += size; + map += size; + } + set_page_count(map, 1); + return index; +} + +static inline struct page * rmqueue (int order, int gfp_mask, int offset) +{ + struct free_area_struct * area = free_area+order+offset; + unsigned long curr_order = order, map_nr; + struct page *page; + struct list_head *head, *curr; + + do { + head = &area->free_list; + curr = memlist_next(head); + + while (curr != head) { + page = memlist_entry(curr, struct page, list); + if (!(gfp_mask & __GFP_DMA) || CAN_DMA(page)) { + memlist_del(curr); + map_nr = page - mem_map; + MARK_USED(map_nr, curr_order, area); + nr_free_pages -= 1 << order; + map_nr = EXPAND(page, map_nr, order, curr_order, area); + page = mem_map + map_nr; + return page; + } + curr = memlist_next(curr); + } + curr_order++; + area++; + } while (curr_order < NR_MEM_LISTS); -#ifdef CONFIG_BIGMEM -#define RMQUEUEBIG(order, gfp_mask) \ -if (gfp_mask & __GFP_BIGMEM) { \ - struct free_area_struct * area = free_area+order+BIGMEM_LISTS_OFFSET; \ - unsigned long new_order = order; \ - do { struct page *prev = memory_head(area), *ret = prev->next; \ - if (memory_head(area) != ret) { \ - unsigned long map_nr; \ - (prev->next = ret->next)->prev = prev; \ - map_nr = ret - mem_map; \ - MARK_USED(map_nr, new_order, area); \ - nr_free_pages -= 1 << order; \ - nr_free_bigpages -= 1 << order; \ - EXPAND(ret, map_nr, order, new_order, area); \ - spin_unlock_irqrestore(&page_alloc_lock, flags); \ - return ADDRESS(map_nr); \ - } \ - new_order++; area++; \ - } while (new_order < NR_MEM_LISTS); \ + return NULL; } + +static inline int balance_lowmemory (int gfp_mask) +{ + int freed; + static int low_on_memory = 0; + +#ifndef CONFIG_HIGHMEM + if (nr_free_pages > freepages.min) { + if (!low_on_memory) + return 1; + if (nr_free_pages >= freepages.high) { + low_on_memory = 0; + return 1; + } + } + + low_on_memory = 1; +#else + static int low_on_highmemory = 0; + + if (gfp_mask & __GFP_HIGHMEM) + { + if (nr_free_pages > freepages.min) { + if (!low_on_highmemory) { + return 1; + } + if (nr_free_pages >= freepages.high) { + low_on_highmemory = 0; + return 1; + } + } + low_on_highmemory = 1; + } else { + if (nr_free_pages+nr_free_highpages > freepages.min) { + if (!low_on_memory) { + return 1; + } + if (nr_free_pages+nr_free_highpages >= freepages.high) { + low_on_memory = 0; + return 1; + } + } + low_on_memory = 1; + } #endif + current->flags |= PF_MEMALLOC; + freed = try_to_free_pages(gfp_mask); + current->flags &= ~PF_MEMALLOC; -#define RMQUEUE(order, gfp_mask) \ -do { struct free_area_struct * area = free_area+order; \ - unsigned long new_order = order; \ - do { struct page *prev = memory_head(area), *ret = prev->next; \ - while (memory_head(area) != ret) { \ - if (!(gfp_mask & __GFP_DMA) || CAN_DMA(ret)) { \ - unsigned long map_nr; \ - (prev->next = ret->next)->prev = prev; \ - map_nr = ret - mem_map; \ - MARK_USED(map_nr, new_order, area); \ - nr_free_pages -= 1 << order; \ - EXPAND(ret, map_nr, order, new_order, area); \ - spin_unlock_irqrestore(&page_alloc_lock,flags);\ - return ADDRESS(map_nr); \ - } \ - prev = ret; \ - ret = ret->next; \ - } \ - new_order++; area++; \ - } while (new_order < NR_MEM_LISTS); \ -} while (0) - -#define EXPAND(map,index,low,high,area) \ -do { unsigned long size = 1 << high; \ - while (high > low) { \ - area--; high--; size >>= 1; \ - add_mem_queue(area, map); \ - MARK_USED(index, high, area); \ - index += size; \ - map += size; \ - } \ - set_page_count(map, 1); \ -} while (0) + if (!freed && !(gfp_mask & (__GFP_MED | __GFP_HIGH))) + return 0; + return 1; +} -unsigned long __get_free_pages(int gfp_mask, unsigned long order) +struct page * __get_pages(int gfp_mask, unsigned long order) { unsigned long flags; + struct page *page; if (order >= NR_MEM_LISTS) goto nopage; -#ifdef ATOMIC_MEMORY_DEBUGGING - if ((gfp_mask & __GFP_WAIT) && in_interrupt()) { - static int count = 0; - if (++count < 5) { - printk("gfp called nonatomically from interrupt %p\n", - __builtin_return_address(0)); - } - goto nopage; - } -#endif + /* + * If anyone calls gfp from interrupts nonatomically then it + * will sooner or later tripped up by a schedule(). + */ /* * If this is a recursive call, we'd better * do our best to just allocate things without * further thought. */ - if (!(current->flags & PF_MEMALLOC)) { - int freed; - static int low_on_memory = 0; + if (!(current->flags & PF_MEMALLOC)) + goto lowmemory; -#ifndef CONFIG_BIGMEM - if (nr_free_pages > freepages.min) { - if (!low_on_memory) - goto ok_to_allocate; - if (nr_free_pages >= freepages.high) { - low_on_memory = 0; - goto ok_to_allocate; - } - } +ok_to_allocate: + spin_lock_irqsave(&page_alloc_lock, flags); - low_on_memory = 1; -#else - static int low_on_bigmemory = 0; - - if (gfp_mask & __GFP_BIGMEM) - { - if (nr_free_pages > freepages.min) { - if (!low_on_bigmemory) - goto ok_to_allocate; - if (nr_free_pages >= freepages.high) { - low_on_bigmemory = 0; - goto ok_to_allocate; - } - } - low_on_bigmemory = 1; - } else { - if (nr_free_pages-nr_free_bigpages > freepages.min) { - if (!low_on_memory) - goto ok_to_allocate; - if (nr_free_pages-nr_free_bigpages >= freepages.high) { - low_on_memory = 0; - goto ok_to_allocate; - } - } - low_on_memory = 1; +#ifdef CONFIG_HIGHMEM + if (gfp_mask & __GFP_HIGHMEM) { + page = rmqueue(order, gfp_mask, HIGHMEM_LISTS_OFFSET); + if (page) { + nr_free_highpages -= 1 << order; + spin_unlock_irqrestore(&page_alloc_lock, flags); + goto ret; } -#endif - current->flags |= PF_MEMALLOC; - freed = try_to_free_pages(gfp_mask); - current->flags &= ~PF_MEMALLOC; - - if (!freed && !(gfp_mask & (__GFP_MED | __GFP_HIGH))) - goto nopage; } -ok_to_allocate: - spin_lock_irqsave(&page_alloc_lock, flags); -#ifdef CONFIG_BIGMEM - RMQUEUEBIG(order, gfp_mask); #endif - RMQUEUE(order, gfp_mask); + page = rmqueue(order, gfp_mask, 0); spin_unlock_irqrestore(&page_alloc_lock, flags); + if (page) + goto ret; /* * If we can schedule, do so, and make sure to yield. @@ -320,7 +317,28 @@ ok_to_allocate: } nopage: - return 0; + return NULL; + +lowmemory: + if (balance_lowmemory(gfp_mask)) + goto ok_to_allocate; + goto nopage; +ret: + return page; +} + +unsigned long __get_free_pages(int gfp_mask, unsigned long order) +{ + struct page *page; + page = __get_pages(gfp_mask, order); + if (!page) + return 0; + return page_address(page); +} + +struct page * get_free_highpage(int gfp_mask) +{ + return __get_pages(gfp_mask, 0); } /* @@ -333,33 +351,37 @@ void show_free_areas(void) unsigned long order, flags; unsigned long total = 0; - printk("Free pages: %6dkB (%6dkB BigMem)\n ( ", + printk("Free pages: %6dkB (%6ldkB HighMem)\n ( ", nr_free_pages<<(PAGE_SHIFT-10), - nr_free_bigpages<<(PAGE_SHIFT-10)); + nr_free_highpages<<(PAGE_SHIFT-10)); printk("Free: %d, lru_cache: %d (%d %d %d)\n", nr_free_pages, nr_lru_pages, freepages.min, freepages.low, freepages.high); + spin_lock_irqsave(&page_alloc_lock, flags); - for (order=0 ; order < NR_MEM_LISTS; order++) { - struct page * tmp; + for (order = 0; order < NR_MEM_LISTS; order++) { unsigned long nr = 0; - for (tmp = free_area[order].next ; tmp != memory_head(free_area+order) ; tmp = tmp->next) { - nr ++; - } -#ifdef CONFIG_BIGMEM - for (tmp = free_area[BIGMEM_LISTS_OFFSET+order].next; - tmp != memory_head(free_area+BIGMEM_LISTS_OFFSET+order); - tmp = tmp->next) { - nr ++; + struct list_head *head, *curr; + struct page *page; + + head = &free_area[order].free_list; + for (curr = memlist_next(head); curr != head; curr = memlist_next(curr)) { + page = memlist_entry(curr, struct page, list); + nr++; } +#ifdef CONFIG_HIGHMEM + head = &free_area[order+HIGHMEM_LISTS_OFFSET].free_list; + for (curr = memlist_next(head); curr != head; curr = memlist_next(curr)) + nr++; #endif total += nr * ((PAGE_SIZE>>10) << order); printk("%lu*%lukB ", nr, (unsigned long)((PAGE_SIZE>>10) << order)); } spin_unlock_irqrestore(&page_alloc_lock, flags); + printk("= %lukB)\n", total); #ifdef SWAP_CACHE_INFO show_swap_cache_info(); @@ -374,11 +396,13 @@ void show_free_areas(void) * - mark all memory queues empty * - clear the memory bitmaps */ -unsigned long __init free_area_init(unsigned long start_mem, unsigned long end_mem) +volatile int data; +void __init free_area_init(unsigned long end_mem_pages) { mem_map_t * p; - unsigned long mask = PAGE_MASK; + unsigned long mask = -1; unsigned long i; + unsigned long map_size; /* * Select nr of pages we try to keep free for important stuff @@ -387,7 +411,7 @@ unsigned long __init free_area_init(unsigned long start_mem, unsigned long end_m * This is fairly arbitrary, but based on some behaviour * analysis. */ - i = (end_mem - PAGE_OFFSET) >> (PAGE_SHIFT+7); + i = end_mem_pages >> 7; if (i < 10) i = 10; if (i > 256) @@ -395,36 +419,48 @@ unsigned long __init free_area_init(unsigned long start_mem, unsigned long end_m freepages.min = i; freepages.low = i * 2; freepages.high = i * 3; - mem_map = (mem_map_t *) LONG_ALIGN(start_mem); - p = mem_map + MAP_NR(end_mem); - start_mem = LONG_ALIGN((unsigned long) p); - memset(mem_map, 0, start_mem - (unsigned long) mem_map); - do { - --p; + + /* + * Most architectures just pick 'start_mem'. Some architectures + * (with lots of mem and discontinous memory maps) have to search + * for a good area. + */ + map_size = end_mem_pages*sizeof(struct page); + mem_map = (struct page *) alloc_bootmem(map_size); + memset(mem_map, 0, map_size); + + /* + * Initially all pages are reserved - free ones are freed + * up by free_all_bootmem() once the early boot process is + * done. + */ + for (p = mem_map; p < mem_map + end_mem_pages; p++) { set_page_count(p, 0); - p->flags = (1 << PG_DMA) | (1 << PG_reserved); + p->flags = (1 << PG_DMA); + SetPageReserved(p); init_waitqueue_head(&p->wait); - } while (p > mem_map); - + memlist_init(&p->list); + } + for (i = 0 ; i < NR_MEM_LISTS ; i++) { unsigned long bitmap_size; - init_mem_queue(free_area+i); -#ifdef CONFIG_BIGMEM - init_mem_queue(free_area+BIGMEM_LISTS_OFFSET+i); + unsigned int * map; + memlist_init(&(free_area+i)->free_list); +#ifdef CONFIG_HIGHMEM + memlist_init(&(free_area+HIGHMEM_LISTS_OFFSET+i)->free_list); #endif mask += mask; - end_mem = (end_mem + ~mask) & mask; - bitmap_size = (end_mem - PAGE_OFFSET) >> (PAGE_SHIFT + i); + end_mem_pages = (end_mem_pages + ~mask) & mask; + bitmap_size = end_mem_pages >> i; bitmap_size = (bitmap_size + 7) >> 3; bitmap_size = LONG_ALIGN(bitmap_size); - free_area[i].map = (unsigned int *) start_mem; - memset((void *) start_mem, 0, bitmap_size); - start_mem += bitmap_size; -#ifdef CONFIG_BIGMEM - free_area[BIGMEM_LISTS_OFFSET+i].map = (unsigned int *) start_mem; - memset((void *) start_mem, 0, bitmap_size); - start_mem += bitmap_size; + map = (unsigned int *) alloc_bootmem(bitmap_size); + free_area[i].map = map; + memset((void *) map, 0, bitmap_size); +#ifdef CONFIG_HIGHMEM + map = (unsigned int *) alloc_bootmem(bitmap_size); + free_area[HIGHMEM_LISTS_OFFSET+i].map = map; + memset((void *) map, 0, bitmap_size); #endif } - return start_mem; } diff --git a/mm/page_io.c b/mm/page_io.c index 97516e77dcfa..3ce1a186c4e1 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -33,7 +33,7 @@ * that shared pages stay shared while being swapped. */ -static int rw_swap_page_base(int rw, unsigned long entry, struct page *page, int wait) +static int rw_swap_page_base(int rw, pte_t entry, struct page *page, int wait) { unsigned long type, offset; struct swap_info_struct * p; @@ -42,13 +42,6 @@ static int rw_swap_page_base(int rw, unsigned long entry, struct page *page, int kdev_t dev = 0; int block_size; -#ifdef DEBUG_SWAP - printk ("DebugVM: %s_swap_page entry %08lx, page %p (count %d), %s\n", - (rw == READ) ? "read" : "write", - entry, (char *) page_address(page), page_count(page), - wait ? "wait" : "nowait"); -#endif - type = SWP_TYPE(entry); if (type >= nr_swapfiles) { printk("Internal error: bad swap-device\n"); @@ -66,9 +59,7 @@ static int rw_swap_page_base(int rw, unsigned long entry, struct page *page, int return 0; } if (p->swap_map && !p->swap_map[offset]) { - printk(KERN_ERR "rw_swap_page: " - "Trying to %s unallocated swap (%08lx)\n", - (rw == READ) ? "read" : "write", entry); + pte_ERROR(entry); return 0; } if (!(p->flags & SWP_USED)) { @@ -127,12 +118,6 @@ static int rw_swap_page_base(int rw, unsigned long entry, struct page *page, int if (page_count(page) == 0) printk(KERN_ERR "rw_swap_page: page unused while waiting!\n"); -#ifdef DEBUG_SWAP - printk ("DebugVM: %s_swap_page finished on page %p (count %d)\n", - (rw == READ) ? "read" : "write", - (char *) page_address(page), - page_count(page)); -#endif return 1; } @@ -145,7 +130,7 @@ static int rw_swap_page_base(int rw, unsigned long entry, struct page *page, int */ void rw_swap_page(int rw, struct page *page, int wait) { - unsigned long entry = page->offset; + pte_t entry = get_pagecache_pte(page); if (!PageLocked(page)) PAGE_BUG(page); @@ -162,7 +147,7 @@ void rw_swap_page(int rw, struct page *page, int wait) * Therefore we can't use it. Later when we can remove the need for the * lock map and we can reduce the number of functions exported. */ -void rw_swap_page_nolock(int rw, unsigned long entry, char *buf, int wait) +void rw_swap_page_nolock(int rw, pte_t entry, char *buf, int wait) { struct page *page = mem_map + MAP_NR(buf); diff --git a/mm/slab.c b/mm/slab.c index 0776b3ad0268..23f88618f089 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -317,10 +317,10 @@ static int slab_break_gfp_order = SLAB_BREAK_GFP_ORDER_LO; * slab an obj belongs to. With kmalloc(), and kfree(), these are used * to find the cache which an obj belongs to. */ -#define SLAB_SET_PAGE_CACHE(pg, x) ((pg)->next = (struct page *)(x)) -#define SLAB_GET_PAGE_CACHE(pg) ((kmem_cache_t *)(pg)->next) -#define SLAB_SET_PAGE_SLAB(pg, x) ((pg)->prev = (struct page *)(x)) -#define SLAB_GET_PAGE_SLAB(pg) ((kmem_slab_t *)(pg)->prev) +#define SLAB_SET_PAGE_CACHE(pg,x) ((pg)->list.next = (struct list_head *)(x)) +#define SLAB_GET_PAGE_CACHE(pg) ((kmem_cache_t *)(pg)->list.next) +#define SLAB_SET_PAGE_SLAB(pg,x) ((pg)->list.prev = (struct list_head *)(x)) +#define SLAB_GET_PAGE_SLAB(pg) ((kmem_slab_t *)(pg)->list.prev) /* Size description struct for general caches. */ typedef struct cache_sizes { @@ -402,7 +402,7 @@ static kmem_cache_t *cache_slabp = NULL; static unsigned long bufctl_limit = 0; /* Initialisation - setup the `cache' cache. */ -long __init kmem_cache_init(long start, long end) +void __init kmem_cache_init(void) { size_t size, i; @@ -450,7 +450,6 @@ long __init kmem_cache_init(long start, long end) */ if (num_physpages > (32 << 20) >> PAGE_SHIFT) slab_break_gfp_order = SLAB_BREAK_GFP_ORDER_HI; - return start; } /* Initialisation - setup remaining internal and general caches. diff --git a/mm/swap_state.c b/mm/swap_state.c index 3b3a65a715ed..0a78127f2c98 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -67,25 +67,16 @@ void show_swap_cache_info(void) } #endif -void add_to_swap_cache(struct page *page, unsigned long entry) +void add_to_swap_cache(struct page *page, pte_t entry) { #ifdef SWAP_CACHE_INFO swap_cache_add_total++; #endif -#ifdef DEBUG_SWAP - printk("DebugVM: add_to_swap_cache(%08lx count %d, entry %08lx)\n", - page_address(page), page_count(page), entry); -#endif - if (PageTestandSetSwapCache(page)) { - printk(KERN_ERR "swap_cache: replacing non-empty entry %08lx " - "on page %08lx\n", - page->offset, page_address(page)); - } - if (page->inode) { - printk(KERN_ERR "swap_cache: replacing page-cached entry " - "on page %08lx\n", page_address(page)); - } - add_to_page_cache(page, &swapper_inode, entry); + if (PageTestandSetSwapCache(page)) + BUG(); + if (page->inode) + BUG(); + add_to_page_cache(page, &swapper_inode, pte_val(entry)); } /* @@ -94,13 +85,13 @@ void add_to_swap_cache(struct page *page, unsigned long entry) * Note: if swap_map[] reaches SWAP_MAP_MAX the entries are treated as * "permanent", but will be reclaimed by the next swapoff. */ -int swap_duplicate(unsigned long entry) +int swap_duplicate(pte_t entry) { struct swap_info_struct * p; unsigned long offset, type; int result = 0; - if (!entry) + if (!pte_val(entry)) goto out; type = SWP_TYPE(entry); if (type & SHM_SWP_TYPE) @@ -121,41 +112,32 @@ int swap_duplicate(unsigned long entry) else { static int overflow = 0; if (overflow++ < 5) - printk(KERN_WARNING - "swap_duplicate: entry %08lx map count=%d\n", - entry, p->swap_map[offset]); + pte_ERROR(entry); p->swap_map[offset] = SWAP_MAP_MAX; } result = 1; -#ifdef DEBUG_SWAP - printk("DebugVM: swap_duplicate(entry %08lx, count now %d)\n", - entry, p->swap_map[offset]); -#endif out: return result; bad_file: - printk(KERN_ERR - "swap_duplicate: entry %08lx, nonexistent swap file\n", entry); + pte_ERROR(entry); goto out; bad_offset: - printk(KERN_ERR - "swap_duplicate: entry %08lx, offset exceeds max\n", entry); + pte_ERROR(entry); goto out; bad_unused: - printk(KERN_ERR - "swap_duplicate at %8p: entry %08lx, unused page\n", - __builtin_return_address(0), entry); + pte_ERROR(entry); goto out; } -int swap_count(unsigned long entry) +int swap_count(struct page *page) { struct swap_info_struct * p; unsigned long offset, type; + pte_t entry = get_pagecache_pte(page); int retval = 0; - if (!entry) + if (!pte_val(entry)) goto bad_entry; type = SWP_TYPE(entry); if (type & SHM_SWP_TYPE) @@ -169,10 +151,6 @@ int swap_count(unsigned long entry) if (!p->swap_map[offset]) goto bad_unused; retval = p->swap_map[offset]; -#ifdef DEBUG_SWAP - printk("DebugVM: swap_count(entry %08lx, count %d)\n", - entry, retval); -#endif out: return retval; @@ -180,17 +158,13 @@ bad_entry: printk(KERN_ERR "swap_count: null entry!\n"); goto out; bad_file: - printk(KERN_ERR - "swap_count: entry %08lx, nonexistent swap file!\n", entry); + pte_ERROR(entry); goto out; bad_offset: - printk(KERN_ERR - "swap_count: entry %08lx, offset exceeds max!\n", entry); + pte_ERROR(entry); goto out; bad_unused: - printk(KERN_ERR - "swap_count at %8p: entry %08lx, unused page!\n", - __builtin_return_address(0), entry); + pte_ERROR(entry); goto out; } @@ -198,22 +172,13 @@ static inline void remove_from_swap_cache(struct page *page) { struct inode *inode = page->inode; - if (!inode) { - printk ("VM: Removing swap cache page with zero inode hash " - "on page %08lx\n", page_address(page)); - return; - } - if (inode != &swapper_inode) { - printk ("VM: Removing swap cache page with wrong inode hash " - "on page %08lx\n", page_address(page)); - } + if (!inode) + BUG(); + if (inode != &swapper_inode) + BUG(); if (!PageSwapCache(page)) PAGE_BUG(page); -#ifdef DEBUG_SWAP - printk("DebugVM: remove_from_swap_cache(%08lx count %d)\n", - page_address(page), page_count(page)); -#endif PageClearSwapCache(page); remove_inode_page(page); } @@ -224,19 +189,14 @@ static inline void remove_from_swap_cache(struct page *page) */ void __delete_from_swap_cache(struct page *page) { - long entry = page->offset; + pte_t entry = get_pagecache_pte(page); #ifdef SWAP_CACHE_INFO swap_cache_del_total++; #endif -#ifdef DEBUG_SWAP - printk("DebugVM: delete_from_swap_cache(%08lx count %d, " - "entry %08lx)\n", - page_address(page), page_count(page), entry); -#endif - remove_from_swap_cache (page); + remove_from_swap_cache(page); lock_kernel(); - swap_free (entry); + swap_free(entry); unlock_kernel(); } @@ -268,10 +228,8 @@ void delete_from_swap_cache(struct page *page) * this page if it is the last user of the page. */ -void free_page_and_swap_cache(unsigned long addr) +void free_page_and_swap_cache(struct page *page) { - struct page *page = mem_map + MAP_NR(addr); - /* * If we are the only user, then free up the swap cache. */ @@ -295,7 +253,7 @@ void free_page_and_swap_cache(unsigned long addr) * lock before returning. */ -struct page * lookup_swap_cache(unsigned long entry) +struct page * lookup_swap_cache(pte_t entry) { struct page *found; @@ -303,7 +261,10 @@ struct page * lookup_swap_cache(unsigned long entry) swap_cache_find_total++; #endif while (1) { - found = find_lock_page(&swapper_inode, entry); + /* + * Right now the pagecache is 32-bit only. + */ + found = find_lock_page(&swapper_inode, pte_val(entry)); if (!found) return 0; if (found->inode != &swapper_inode || !PageSwapCache(found)) @@ -331,15 +292,11 @@ out_bad: * the swap entry is no longer in use. */ -struct page * read_swap_cache_async(unsigned long entry, int wait) +struct page * read_swap_cache_async(pte_t entry, int wait) { struct page *found_page = 0, *new_page; unsigned long new_page_addr; -#ifdef DEBUG_SWAP - printk("DebugVM: read_swap_cache_async entry %08lx%s\n", - entry, wait ? ", wait" : ""); -#endif /* * Make sure the swap entry is still in use. */ @@ -368,11 +325,6 @@ struct page * read_swap_cache_async(unsigned long entry, int wait) */ add_to_swap_cache(new_page, entry); rw_swap_page(READ, new_page, wait); -#ifdef DEBUG_SWAP - printk("DebugVM: read_swap_cache_async created " - "entry %08lx at %p\n", - entry, (char *) page_address(new_page)); -#endif return new_page; out_free_page: diff --git a/mm/swapfile.c b/mm/swapfile.c index c4ce5377d5eb..76aea7b7e2b1 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -81,17 +81,18 @@ static inline int scan_swap_map(struct swap_info_struct *si) return 0; } -unsigned long get_swap_page(void) +pte_t get_swap_page(void) { struct swap_info_struct * p; - unsigned long offset, entry; + unsigned long offset; + pte_t entry = __pte(0); int type, wrapped = 0; type = swap_list.next; if (type < 0) - return 0; + goto out; if (nr_swap_pages == 0) - return 0; + goto out; while (1) { p = &swap_info[type]; @@ -106,7 +107,7 @@ unsigned long get_swap_page(void) } else { swap_list.next = type; } - return entry; + goto out; } } type = p->next; @@ -115,19 +116,21 @@ unsigned long get_swap_page(void) type = swap_list.head; wrapped = 1; } - } else if (type < 0) { - return 0; /* out of swap space */ - } + } else + if (type < 0) + goto out; /* out of swap space */ } +out: + return entry; } -void swap_free(unsigned long entry) +void swap_free(pte_t entry) { struct swap_info_struct * p; unsigned long offset, type; - if (!entry) + if (!pte_val(entry)) goto out; type = SWP_TYPE(entry); @@ -154,10 +157,6 @@ void swap_free(unsigned long entry) nr_swap_pages++; } } -#ifdef DEBUG_SWAP - printk("DebugVM: swap_free(entry %08lx, count now %d)\n", - entry, p->swap_map[offset]); -#endif out: return; @@ -171,24 +170,24 @@ bad_offset: printk("swap_free: offset exceeds max\n"); goto out; bad_free: - printk("swap_free: swap-space map bad (entry %08lx)\n",entry); + pte_ERROR(entry); goto out; } /* needs the big kernel lock */ -unsigned long acquire_swap_entry(struct page *page) +pte_t acquire_swap_entry(struct page *page) { struct swap_info_struct * p; unsigned long offset, type; - unsigned long entry; + pte_t entry; if (!test_bit(PG_swap_entry, &page->flags)) goto new_swap_entry; /* We have the old entry in the page offset still */ - entry = page->offset; - if (!entry) + if (!page->offset) goto new_swap_entry; + entry = get_pagecache_pte(page); type = SWP_TYPE(entry); if (type & SHM_SWP_TYPE) goto new_swap_entry; @@ -223,7 +222,7 @@ new_swap_entry: * what to do if a write is requested later. */ static inline void unuse_pte(struct vm_area_struct * vma, unsigned long address, - pte_t *dir, unsigned long entry, unsigned long page) + pte_t *dir, pte_t entry, struct page* page) { pte_t pte = *dir; @@ -239,7 +238,7 @@ static inline void unuse_pte(struct vm_area_struct * vma, unsigned long address, set_pte(dir, pte_mkdirty(pte)); return; } - if (pte_val(pte) != entry) + if (pte_val(pte) != pte_val(entry)) return; set_pte(dir, pte_mkdirty(mk_pte(page, vma->vm_page_prot))); swap_free(entry); @@ -249,7 +248,7 @@ static inline void unuse_pte(struct vm_area_struct * vma, unsigned long address, static inline void unuse_pmd(struct vm_area_struct * vma, pmd_t *dir, unsigned long address, unsigned long size, unsigned long offset, - unsigned long entry, unsigned long page) + pte_t entry, struct page* page) { pte_t * pte; unsigned long end; @@ -257,7 +256,7 @@ static inline void unuse_pmd(struct vm_area_struct * vma, pmd_t *dir, if (pmd_none(*dir)) return; if (pmd_bad(*dir)) { - printk("unuse_pmd: bad pmd (%08lx)\n", pmd_val(*dir)); + pmd_ERROR(*dir); pmd_clear(dir); return; } @@ -271,12 +270,12 @@ static inline void unuse_pmd(struct vm_area_struct * vma, pmd_t *dir, unuse_pte(vma, offset+address-vma->vm_start, pte, entry, page); address += PAGE_SIZE; pte++; - } while (address < end); + } while (address && (address < end)); } static inline void unuse_pgd(struct vm_area_struct * vma, pgd_t *dir, unsigned long address, unsigned long size, - unsigned long entry, unsigned long page) + pte_t entry, struct page* page) { pmd_t * pmd; unsigned long offset, end; @@ -284,7 +283,7 @@ static inline void unuse_pgd(struct vm_area_struct * vma, pgd_t *dir, if (pgd_none(*dir)) return; if (pgd_bad(*dir)) { - printk("unuse_pgd: bad pgd (%08lx)\n", pgd_val(*dir)); + pgd_ERROR(*dir); pgd_clear(dir); return; } @@ -294,28 +293,32 @@ static inline void unuse_pgd(struct vm_area_struct * vma, pgd_t *dir, end = address + size; if (end > PGDIR_SIZE) end = PGDIR_SIZE; + if (address >= end) + BUG(); do { unuse_pmd(vma, pmd, address, end - address, offset, entry, page); address = (address + PMD_SIZE) & PMD_MASK; pmd++; - } while (address < end); + } while (address && (address < end)); } static void unuse_vma(struct vm_area_struct * vma, pgd_t *pgdir, - unsigned long entry, unsigned long page) + pte_t entry, struct page* page) { unsigned long start = vma->vm_start, end = vma->vm_end; - while (start < end) { + if (start >= end) + BUG(); + do { unuse_pgd(vma, pgdir, start, end - start, entry, page); start = (start + PGDIR_SIZE) & PGDIR_MASK; pgdir++; - } + } while (start && (start < end)); } -static void unuse_process(struct mm_struct * mm, unsigned long entry, - unsigned long page) +static void unuse_process(struct mm_struct * mm, + pte_t entry, struct page* page) { struct vm_area_struct* vma; @@ -340,8 +343,8 @@ static int try_to_unuse(unsigned int type) { struct swap_info_struct * si = &swap_info[type]; struct task_struct *p; - struct page *page_map; - unsigned long entry, page; + struct page *page; + pte_t entry; int i; while (1) { @@ -361,8 +364,8 @@ static int try_to_unuse(unsigned int type) /* Get a page for the entry, using the existing swap cache page if there is one. Otherwise, get a clean page and read the swap into it. */ - page_map = read_swap_cache(entry); - if (!page_map) { + page = read_swap_cache(entry); + if (!page) { /* * Continue searching if the entry became unused. */ @@ -370,7 +373,6 @@ static int try_to_unuse(unsigned int type) continue; return -ENOMEM; } - page = page_address(page_map); read_lock(&tasklist_lock); for_each_task(p) unuse_process(p->mm, entry, page); @@ -378,17 +380,15 @@ static int try_to_unuse(unsigned int type) shm_unuse(entry, page); /* Now get rid of the extra reference to the temporary page we've been using. */ - if (PageSwapCache(page_map)) - delete_from_swap_cache(page_map); - __free_page(page_map); + if (PageSwapCache(page)) + delete_from_swap_cache(page); + __free_page(page); /* * Check for and clear any overflowed swap map counts. */ if (si->swap_map[i] != 0) { if (si->swap_map[i] != SWAP_MAP_MAX) - printk(KERN_ERR - "try_to_unuse: entry %08lx count=%d\n", - entry, si->swap_map[i]); + pte_ERROR(entry); si->swap_map[i] = 0; nr_swap_pages++; } diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 889fabe21c7a..ebe589d856d0 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -20,7 +20,7 @@ static inline void free_area_pte(pmd_t * pmd, unsigned long address, unsigned lo if (pmd_none(*pmd)) return; if (pmd_bad(*pmd)) { - printk("free_area_pte: bad pmd (%08lx)\n", pmd_val(*pmd)); + pmd_ERROR(*pmd); pmd_clear(pmd); return; } @@ -29,7 +29,7 @@ static inline void free_area_pte(pmd_t * pmd, unsigned long address, unsigned lo end = address + size; if (end > PMD_SIZE) end = PMD_SIZE; - while (address < end) { + do { pte_t page = *pte; pte_clear(pte); address += PAGE_SIZE; @@ -37,11 +37,11 @@ static inline void free_area_pte(pmd_t * pmd, unsigned long address, unsigned lo if (pte_none(page)) continue; if (pte_present(page)) { - free_page(pte_page(page)); + __free_page(mem_map+pte_pagenr(page)); continue; } printk("Whee.. Swapped out page in kernel page table\n"); - } + } while (address < end); } static inline void free_area_pmd(pgd_t * dir, unsigned long address, unsigned long size) @@ -52,7 +52,7 @@ static inline void free_area_pmd(pgd_t * dir, unsigned long address, unsigned lo if (pgd_none(*dir)) return; if (pgd_bad(*dir)) { - printk("free_area_pmd: bad pgd (%08lx)\n", pgd_val(*dir)); + pgd_ERROR(*dir); pgd_clear(dir); return; } @@ -61,11 +61,11 @@ static inline void free_area_pmd(pgd_t * dir, unsigned long address, unsigned lo end = address + size; if (end > PGDIR_SIZE) end = PGDIR_SIZE; - while (address < end) { + do { free_area_pte(pmd, address, end - address); address = (address + PMD_SIZE) & PMD_MASK; pmd++; - } + } while (address < end); } void vmfree_area_pages(unsigned long address, unsigned long size) @@ -75,11 +75,11 @@ void vmfree_area_pages(unsigned long address, unsigned long size) dir = pgd_offset_k(address); flush_cache_all(); - while (address < end) { + do { free_area_pmd(dir, address, end - address); address = (address + PGDIR_SIZE) & PGDIR_MASK; dir++; - } + } while (address && (address < end)); flush_tlb_all(); } @@ -91,17 +91,17 @@ static inline int alloc_area_pte(pte_t * pte, unsigned long address, unsigned lo end = address + size; if (end > PMD_SIZE) end = PMD_SIZE; - while (address < end) { - unsigned long page; + do { + struct page * page; if (!pte_none(*pte)) printk("alloc_area_pte: page already exists\n"); - page = __get_free_page(GFP_KERNEL|GFP_BIGMEM); + page = get_free_highpage(GFP_KERNEL|__GFP_HIGHMEM); if (!page) return -ENOMEM; set_pte(pte, mk_pte(page, PAGE_KERNEL)); address += PAGE_SIZE; pte++; - } + } while (address < end); return 0; } @@ -113,7 +113,7 @@ static inline int alloc_area_pmd(pmd_t * pmd, unsigned long address, unsigned lo end = address + size; if (end > PGDIR_SIZE) end = PGDIR_SIZE; - while (address < end) { + do { pte_t * pte = pte_alloc_kernel(pmd, address); if (!pte) return -ENOMEM; @@ -121,7 +121,7 @@ static inline int alloc_area_pmd(pmd_t * pmd, unsigned long address, unsigned lo return -ENOMEM; address = (address + PMD_SIZE) & PMD_MASK; pmd++; - } + } while (address < end); return 0; } @@ -132,7 +132,7 @@ int vmalloc_area_pages(unsigned long address, unsigned long size) dir = pgd_offset_k(address); flush_cache_all(); - while (address < end) { + do { pmd_t *pmd; pgd_t olddir = *dir; @@ -145,7 +145,7 @@ int vmalloc_area_pages(unsigned long address, unsigned long size) set_pgdir(address, *dir); address = (address + PGDIR_SIZE) & PGDIR_MASK; dir++; - } + } while (address && (address < end)); flush_tlb_all(); return 0; } @@ -202,14 +202,19 @@ void * vmalloc(unsigned long size) struct vm_struct *area; size = PAGE_ALIGN(size); - if (!size || size > (max_mapnr << PAGE_SHIFT)) + if (!size || size > (max_mapnr << PAGE_SHIFT)) { + BUG(); return NULL; + } area = get_vm_area(size); - if (!area) + if (!area) { + BUG(); return NULL; + } addr = area->addr; if (vmalloc_area_pages(VMALLOC_VMADDR(addr), size)) { vfree(addr); + BUG(); return NULL; } return addr; diff --git a/mm/vmscan.c b/mm/vmscan.c index 31b00047a2a3..83d987a9f0e5 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include @@ -34,20 +34,16 @@ */ static int try_to_swap_out(struct vm_area_struct* vma, unsigned long address, pte_t * page_table, int gfp_mask) { - pte_t pte; - unsigned long entry; - unsigned long page_addr; + pte_t pte, entry; struct page * page; pte = *page_table; if (!pte_present(pte)) goto out_failed; - page_addr = pte_page(pte); - if (MAP_NR(page_addr) >= max_mapnr) + page = pte_page(pte); + if (page-mem_map >= max_mapnr) goto out_failed; - page = mem_map + MAP_NR(page_addr); - /* Don't look at this pte if it's been accessed recently. */ if (pte_young(pte)) { /* @@ -62,7 +58,7 @@ static int try_to_swap_out(struct vm_area_struct* vma, unsigned long address, pt if (PageReserved(page) || PageLocked(page) || ((gfp_mask & __GFP_DMA) && !PageDMA(page)) - || (!(gfp_mask & __GFP_BIGMEM) && PageBIGMEM(page))) + || (!(gfp_mask & __GFP_HIGHMEM) && PageHighMem(page))) goto out_failed; /* @@ -74,9 +70,9 @@ static int try_to_swap_out(struct vm_area_struct* vma, unsigned long address, pt * memory, and we should just continue our scan. */ if (PageSwapCache(page)) { - entry = page->offset; + entry = get_pagecache_pte(page); swap_duplicate(entry); - set_pte(page_table, __pte(entry)); + set_pte(page_table, entry); drop_pte: vma->vm_mm->rss--; flush_tlb_page(vma, address); @@ -150,14 +146,14 @@ drop_pte: * page with that swap entry. */ entry = acquire_swap_entry(page); - if (!entry) + if (!pte_val(entry)) goto out_failed; /* No swap space left */ - if (!(page = prepare_bigmem_swapout(page))) + if (!(page = prepare_highmem_swapout(page))) goto out_swap_free; vma->vm_mm->rss--; - set_pte(page_table, __pte(entry)); + set_pte(page_table, entry); vmlist_access_unlock(vma->vm_mm); flush_tlb_page(vma, address); @@ -201,7 +197,7 @@ static inline int swap_out_pmd(struct vm_area_struct * vma, pmd_t *dir, unsigned if (pmd_none(*dir)) return 0; if (pmd_bad(*dir)) { - printk("swap_out_pmd: bad pmd (%08lx)\n", pmd_val(*dir)); + pmd_ERROR(*dir); pmd_clear(dir); return 0; } @@ -220,7 +216,7 @@ static inline int swap_out_pmd(struct vm_area_struct * vma, pmd_t *dir, unsigned return result; address += PAGE_SIZE; pte++; - } while (address < end); + } while (address && (address < end)); return 0; } @@ -232,7 +228,7 @@ static inline int swap_out_pgd(struct vm_area_struct * vma, pgd_t *dir, unsigned if (pgd_none(*dir)) return 0; if (pgd_bad(*dir)) { - printk("swap_out_pgd: bad pgd (%08lx)\n", pgd_val(*dir)); + pgd_ERROR(*dir); pgd_clear(dir); return 0; } @@ -240,7 +236,7 @@ static inline int swap_out_pgd(struct vm_area_struct * vma, pgd_t *dir, unsigned pmd = pmd_offset(dir, address); pgd_end = (address + PGDIR_SIZE) & PGDIR_MASK; - if (end > pgd_end) + if (pgd_end && (end > pgd_end)) end = pgd_end; do { @@ -249,7 +245,7 @@ static inline int swap_out_pgd(struct vm_area_struct * vma, pgd_t *dir, unsigned return result; address = (address + PMD_SIZE) & PMD_MASK; pmd++; - } while (address < end); + } while (address && (address < end)); return 0; } @@ -265,13 +261,15 @@ static int swap_out_vma(struct vm_area_struct * vma, unsigned long address, int pgdir = pgd_offset(vma->vm_mm, address); end = vma->vm_end; - while (address < end) { + if (address >= end) + BUG(); + do { int result = swap_out_pgd(vma, pgdir, address, end, gfp_mask); if (result) return result; address = (address + PGDIR_SIZE) & PGDIR_MASK; pgdir++; - } + } while (address && (address < end)); return 0; } @@ -498,8 +496,8 @@ int kswapd(void *unused) */ do { /* kswapd is critical to provide GFP_ATOMIC - allocations (not GFP_BIGMEM ones). */ - if (nr_free_pages - nr_free_bigpages >= freepages.high) + allocations (not GFP_HIGHMEM ones). */ + if (nr_free_pages - nr_free_highpages >= freepages.high) break; if (!do_try_to_free_pages(GFP_KSWAPD)) -- 2.39.5