on the Alpha. The only time you would ever not say Y is to say M in
order to debug the code. Say Y unless you know what you are doing.
-Support for over 1Gig of memory
-CONFIG_BIGMEM
- Linux can use up to 1 Gigabytes (= 2^30 bytes) of physical memory.
- If you are compiling a kernel which will never run on a machine with
- more than 1 Gigabyte, answer N here. Otherwise, say Y.
-
- The actual amount of physical memory may need to be specified using a
- kernel command line option such as "mem=256M". (Try "man bootparam"
- or see the documentation of your boot loader (lilo or loadlin) about
- how to pass options to the kernel at boot time. The lilo procedure
- is also explained in the SCSI-HOWTO, available from
- http://metalab.unc.edu/mdw/linux.html#howto .)
+High Memory support
+CONFIG_NOHIGHMEM
+ If you are compiling a kernel which will never run on a machine
+ with more than 1 Gigabyte total physical RAM, answer "off"
+ here (default choice).
+
+ Linux can use up to 64 Gigabytes of physical memory on x86 systems.
+ High memory is all the physical RAM that could not be directly
+ mapped by the kernel - ie. 3GB if there is 4GB RAM in the system,
+ 7GB if there is 8GB RAM in the system.
+
+ If 4 Gigabytes physical RAM or less is used then answer "4GB" here.
+
+ If more than 4 Gigabytes is used then answer "64GB" here. This
+ selection turns Intel PAE (Physical Address Extension) mode on.
+ PAE implements 3-level paging on IA32 processors. PAE is fully
+ supported by Linux, PAE mode is implemented on all recent Intel
+ processors (PPro and better). NOTE: The "64GB" kernel will not
+ boot CPUs that not support PAE!
Normal PC floppy disk support
CONFIG_BLK_DEV_FD
CONFIG_ARCH_NETWINDER
Say Y here if you intend to run this kernel on the NetWinder.
-Maximum Physical Memory
+Virtual/Physical Memory Split
CONFIG_1GB
- Linux can use up to 2 Gigabytes (= 2^31 bytes) of physical memory.
- If you are compiling a kernel which will never run on a machine with
- more than 1 Gigabyte, answer "1GB" here. Otherwise, say "2GB".
-
- The actual amount of physical memory should be specified using a
- kernel command line option such as "mem=256M". (Try "man bootparam"
- or see the documentation of your boot loader (lilo or loadlin) about
- how to pass options to the kernel at boot time. The lilo procedure
- is also explained in the SCSI-HOWTO, available from
- http://metalab.unc.edu/mdw/linux.html#howto .)
+ If you are compiling a kernel which will never run on a machine
+ with more than 1 Gigabyte total physical RAM, answer "3GB/1GB"
+ here (default choice).
+
+ On 32-bit x86 systems Linux can use up to 64 Gigabytes of physical
+ memory. However 32-bit x86 processors have only 4 Gigabytes of
+ virtual memory space. This option specifies the maximum amount of
+ virtual memory space one process can potentially use. Certain types
+ of applications (eg. database servers) perform better if they have
+ as much virtual memory per process as possible.
+
+ The remaining part of the 4G virtual memory space is used by the
+ kernel to 'permanently map' as much physical memory as possible.
+ Certain types of applications perform better if there is more
+ 'permanently mapped' kernel memory.
+
+ [WARNING! Certain boards do not support PCI DMA to physical addresses
+ bigger than 2 Gigabytes. Non-DMA-able memory must not be permanently
+ mapped by the kernel, thus a 1G/3G split will not work on such boxes.]
+
+ As you can see there is no 'perfect split' - the fundamental
+ problem is that 4G of 32-bit virtual memory space is short. So
+ you'll have to pick your own choice - depending on the application
+ load of your box. A 2G/2G split is typically a good choice for a
+ generic Linux server with lots of RAM.
+
+ Any potentially remaining (not permanently mapped) part of physical
+ memory is called 'high memory'. How much total high memory the kernel
+ can handle is influenced by the (next) High Memory configuration option.
+
+ The actual amount of total physical memory will either be
+ autodetected or can be forced by using a kernel command line option
+ such as "mem=256M". (Try "man bootparam" or see the documentation of
+ your boot loader (lilo or loadlin) about how to pass options to the
+ kernel at boot time. The lilo procedure is also explained in the
+ SCSI-HOWTO, available from http://metalab.unc.edu/mdw/linux.html#howto .)
Math emulation
CONFIG_NWFPE
# LocalWords: KERNNAME kname ktype kernelname Kerneltype KERNTYPE Alt RX mdafb
# LocalWords: dataless kerneltype SYSNAME Comtrol Rocketport palmtop fbset EGS
# LocalWords: nvram SYSRQ SysRq PrintScreen sysrq NVRAMs NvRAM Shortwave RTTY
-# LocalWords: Sitor Amtor Pactor GTOR hayes TX TMOUT JFdocs BIGMEM DAC IRQ's
+# LocalWords: Sitor Amtor Pactor GTOR hayes TX TMOUT JFdocs HIGHMEM DAC IRQ's
# LocalWords: IDEPCI IDEDMA idedma PDC pdc TRM trm raidtools luthien nuclecu
# LocalWords: unam mx miguel koobera uic EMUL solaris pp ieee lpsg co DMAs TOS
# LocalWords: BLDCONFIG preloading jumperless BOOTINIT modutils multipath GRE
define_bool CONFIG_X86_USE_3DNOW y
fi
+choice 'High Memory Support' \
+ "off CONFIG_NOHIGHMEM \
+ 4GB CONFIG_HIGHMEM4G \
+ 64GB CONFIG_HIGHMEM64G" off
+if [ "$CONFIG_HIGHMEM4G" = "y" ]; then
+ define_bool CONFIG_HIGHMEM y
+fi
+if [ "$CONFIG_HIGHMEM64G" = "y" ]; then
+ define_bool CONFIG_HIGHMEM y
+ define_bool CONFIG_X86_PAE y
+fi
+
bool 'Math emulation' CONFIG_MATH_EMULATION
bool 'MTRR (Memory Type Range Register) support' CONFIG_MTRR
bool 'Symmetric multi-processing support' CONFIG_SMP
mainmenu_option next_comment
comment 'General setup'
-bool 'Support for over 1Gig of memory' CONFIG_BIGMEM
bool 'Networking support' CONFIG_NET
bool 'SGI Visual Workstation support' CONFIG_VISWS
if [ "$CONFIG_VISWS" = "y" ]; then
CONFIG_X86_POPAD_OK=y
CONFIG_X86_TSC=y
CONFIG_X86_GOOD_APIC=y
-CONFIG_1GB=y
-# CONFIG_2GB is not set
+CONFIG_NOHIGHMEM=y
+# CONFIG_HIGHMEM4G is not set
+# CONFIG_HIGHMEM64G is not set
# CONFIG_MATH_EMULATION is not set
# CONFIG_MTRR is not set
CONFIG_SMP=y
#
# General setup
#
-# CONFIG_BIGMEM is not set
CONFIG_NET=y
# CONFIG_VISWS is not set
CONFIG_X86_IO_APIC=y
# CONFIG_BLK_DEV_OFFBOARD is not set
# CONFIG_BLK_DEV_AEC6210 is not set
CONFIG_BLK_DEV_PIIX=y
-# CONFIG_BLK_DEV_SIS5513 is not set
+# CONFIG_BLK_DEV_PIIX_TUNING is not set
# CONFIG_IDE_CHIPSETS is not set
# CONFIG_BLK_CPQ_DA is not set
.org 0x1000
ENTRY(swapper_pg_dir)
.long 0x00102007
- .fill __USER_PGD_PTRS-1,4,0
- /* default: 767 entries */
+ .long 0x00103007
+ .fill BOOT_USER_PGD_PTRS-2,4,0
+ /* default: 766 entries */
.long 0x00102007
- /* default: 255 entries */
- .fill __KERNEL_PGD_PTRS-1,4,0
+ .long 0x00103007
+ /* default: 254 entries */
+ .fill BOOT_KERNEL_PGD_PTRS-2,4,0
/*
* The page tables are initialized to only 4MB here - the final page
.long 0x3f0007,0x3f1007,0x3f2007,0x3f3007,0x3f4007,0x3f5007,0x3f6007,0x3f7007
.long 0x3f8007,0x3f9007,0x3fa007,0x3fb007,0x3fc007,0x3fd007,0x3fe007,0x3ff007
-.org 0x3000
-ENTRY(empty_bad_page)
-
+ENTRY(pg1)
+ .long 0x400007,0x001007,0x002007,0x003007,0x004007,0x005007,0x006007,0x007007
+ .long 0x408007,0x009007,0x00a007,0x00b007,0x00c007,0x00d007,0x00e007,0x00f007
+ .long 0x410007,0x011007,0x012007,0x013007,0x014007,0x015007,0x016007,0x017007
+ .long 0x418007,0x019007,0x01a007,0x01b007,0x01c007,0x01d007,0x01e007,0x01f007
+ .long 0x420007,0x021007,0x022007,0x023007,0x024007,0x025007,0x026007,0x027007
+ .long 0x428007,0x029007,0x02a007,0x02b007,0x02c007,0x02d007,0x02e007,0x02f007
+ .long 0x430007,0x031007,0x032007,0x033007,0x034007,0x035007,0x036007,0x037007
+ .long 0x438007,0x039007,0x03a007,0x03b007,0x03c007,0x03d007,0x03e007,0x03f007
+ .long 0x440007,0x041007,0x042007,0x043007,0x044007,0x045007,0x046007,0x047007
+ .long 0x448007,0x049007,0x04a007,0x04b007,0x04c007,0x04d007,0x04e007,0x04f007
+ .long 0x450007,0x051007,0x052007,0x053007,0x054007,0x055007,0x056007,0x057007
+ .long 0x458007,0x059007,0x05a007,0x05b007,0x05c007,0x05d007,0x05e007,0x05f007
+ .long 0x460007,0x061007,0x062007,0x063007,0x064007,0x065007,0x066007,0x067007
+ .long 0x468007,0x069007,0x06a007,0x06b007,0x06c007,0x06d007,0x06e007,0x06f007
+ .long 0x470007,0x071007,0x072007,0x073007,0x074007,0x075007,0x076007,0x077007
+ .long 0x478007,0x079007,0x07a007,0x07b007,0x07c007,0x07d007,0x07e007,0x07f007
+ .long 0x480007,0x081007,0x082007,0x083007,0x084007,0x085007,0x086007,0x087007
+ .long 0x488007,0x089007,0x08a007,0x08b007,0x08c007,0x08d007,0x08e007,0x08f007
+ .long 0x490007,0x091007,0x092007,0x093007,0x094007,0x095007,0x096007,0x097007
+ .long 0x498007,0x099007,0x09a007,0x09b007,0x09c007,0x09d007,0x09e007,0x09f007
+ .long 0x4a0007,0x0a1007,0x0a2007,0x0a3007,0x0a4007,0x0a5007,0x0a6007,0x0a7007
+ .long 0x4a8007,0x0a9007,0x0aa007,0x0ab007,0x0ac007,0x0ad007,0x0ae007,0x0af007
+ .long 0x4b0007,0x0b1007,0x0b2007,0x0b3007,0x0b4007,0x0b5007,0x0b6007,0x0b7007
+ .long 0x4b8007,0x0b9007,0x0ba007,0x0bb007,0x0bc007,0x0bd007,0x0be007,0x0bf007
+ .long 0x4c0007,0x0c1007,0x0c2007,0x0c3007,0x0c4007,0x0c5007,0x0c6007,0x0c7007
+ .long 0x4c8007,0x0c9007,0x0ca007,0x0cb007,0x0cc007,0x0cd007,0x0ce007,0x0cf007
+ .long 0x4d0007,0x0d1007,0x0d2007,0x0d3007,0x0d4007,0x0d5007,0x0d6007,0x0d7007
+ .long 0x4d8007,0x0d9007,0x0da007,0x0db007,0x0dc007,0x0dd007,0x0de007,0x0df007
+ .long 0x4e0007,0x0e1007,0x0e2007,0x0e3007,0x0e4007,0x0e5007,0x0e6007,0x0e7007
+ .long 0x4e8007,0x0e9007,0x0ea007,0x0eb007,0x0ec007,0x0ed007,0x0ee007,0x0ef007
+ .long 0x4f0007,0x0f1007,0x0f2007,0x0f3007,0x0f4007,0x0f5007,0x0f6007,0x0f7007
+ .long 0x4f8007,0x0f9007,0x0fa007,0x0fb007,0x0fc007,0x0fd007,0x0fe007,0x0ff007
+ .long 0x500007,0x001007,0x002007,0x003007,0x004007,0x005007,0x006007,0x007007
+ .long 0x508007,0x009007,0x00a007,0x00b007,0x00c007,0x00d007,0x00e007,0x00f007
+ .long 0x510007,0x011007,0x012007,0x013007,0x014007,0x015007,0x016007,0x017007
+ .long 0x518007,0x019007,0x01a007,0x01b007,0x01c007,0x01d007,0x01e007,0x01f007
+ .long 0x520007,0x021007,0x022007,0x023007,0x024007,0x025007,0x026007,0x027007
+ .long 0x528007,0x029007,0x02a007,0x02b007,0x02c007,0x02d007,0x02e007,0x02f007
+ .long 0x530007,0x031007,0x032007,0x033007,0x034007,0x035007,0x036007,0x037007
+ .long 0x538007,0x039007,0x03a007,0x03b007,0x03c007,0x03d007,0x03e007,0x03f007
+ .long 0x540007,0x041007,0x042007,0x043007,0x044007,0x045007,0x046007,0x047007
+ .long 0x548007,0x049007,0x04a007,0x04b007,0x04c007,0x04d007,0x04e007,0x04f007
+ .long 0x550007,0x051007,0x052007,0x053007,0x054007,0x055007,0x056007,0x057007
+ .long 0x558007,0x059007,0x05a007,0x05b007,0x05c007,0x05d007,0x05e007,0x05f007
+ .long 0x560007,0x061007,0x062007,0x063007,0x064007,0x065007,0x066007,0x067007
+ .long 0x568007,0x069007,0x06a007,0x06b007,0x06c007,0x06d007,0x06e007,0x06f007
+ .long 0x570007,0x071007,0x072007,0x073007,0x074007,0x075007,0x076007,0x077007
+ .long 0x578007,0x079007,0x07a007,0x07b007,0x07c007,0x07d007,0x07e007,0x07f007
+ .long 0x580007,0x081007,0x082007,0x083007,0x084007,0x085007,0x086007,0x087007
+ .long 0x588007,0x089007,0x08a007,0x08b007,0x08c007,0x08d007,0x08e007,0x08f007
+ .long 0x590007,0x091007,0x092007,0x093007,0x094007,0x095007,0x096007,0x097007
+ .long 0x598007,0x099007,0x09a007,0x09b007,0x09c007,0x09d007,0x09e007,0x09f007
+ .long 0x5a0007,0x0a1007,0x0a2007,0x0a3007,0x0a4007,0x0a5007,0x0a6007,0x0a7007
+ .long 0x5a8007,0x0a9007,0x0aa007,0x0ab007,0x0ac007,0x0ad007,0x0ae007,0x0af007
+ .long 0x5b0007,0x0b1007,0x0b2007,0x0b3007,0x0b4007,0x0b5007,0x0b6007,0x0b7007
+ .long 0x5b8007,0x0b9007,0x0ba007,0x0bb007,0x0bc007,0x0bd007,0x0be007,0x0bf007
+ .long 0x5c0007,0x0c1007,0x0c2007,0x0c3007,0x0c4007,0x0c5007,0x0c6007,0x0c7007
+ .long 0x5c8007,0x0c9007,0x0ca007,0x0cb007,0x0cc007,0x0cd007,0x0ce007,0x0cf007
+ .long 0x5d0007,0x0d1007,0x0d2007,0x0d3007,0x0d4007,0x0d5007,0x0d6007,0x0d7007
+ .long 0x5d8007,0x0d9007,0x0da007,0x0db007,0x0dc007,0x0dd007,0x0de007,0x0df007
+ .long 0x5e0007,0x0e1007,0x0e2007,0x0e3007,0x0e4007,0x0e5007,0x0e6007,0x0e7007
+ .long 0x5e8007,0x0e9007,0x0ea007,0x0eb007,0x0ec007,0x0ed007,0x0ee007,0x0ef007
+ .long 0x5f0007,0x0f1007,0x0f2007,0x0f3007,0x0f4007,0x0f5007,0x0f6007,0x0f7007
+ .long 0x5f8007,0x0f9007,0x0fa007,0x0fb007,0x0fc007,0x0fd007,0x0fe007,0x0ff007
+ .long 0x600007,0x001007,0x002007,0x003007,0x004007,0x005007,0x006007,0x007007
+ .long 0x608007,0x009007,0x00a007,0x00b007,0x00c007,0x00d007,0x00e007,0x00f007
+ .long 0x610007,0x011007,0x012007,0x013007,0x014007,0x015007,0x016007,0x017007
+ .long 0x618007,0x019007,0x01a007,0x01b007,0x01c007,0x01d007,0x01e007,0x01f007
+ .long 0x620007,0x021007,0x022007,0x023007,0x024007,0x025007,0x026007,0x027007
+ .long 0x628007,0x029007,0x02a007,0x02b007,0x02c007,0x02d007,0x02e007,0x02f007
+ .long 0x630007,0x031007,0x032007,0x033007,0x034007,0x035007,0x036007,0x037007
+ .long 0x638007,0x039007,0x03a007,0x03b007,0x03c007,0x03d007,0x03e007,0x03f007
+ .long 0x640007,0x041007,0x042007,0x043007,0x044007,0x045007,0x046007,0x047007
+ .long 0x648007,0x049007,0x04a007,0x04b007,0x04c007,0x04d007,0x04e007,0x04f007
+ .long 0x650007,0x051007,0x052007,0x053007,0x054007,0x055007,0x056007,0x057007
+ .long 0x658007,0x059007,0x05a007,0x05b007,0x05c007,0x05d007,0x05e007,0x05f007
+ .long 0x660007,0x061007,0x062007,0x063007,0x064007,0x065007,0x066007,0x067007
+ .long 0x668007,0x069007,0x06a007,0x06b007,0x06c007,0x06d007,0x06e007,0x06f007
+ .long 0x670007,0x071007,0x072007,0x073007,0x074007,0x075007,0x076007,0x077007
+ .long 0x678007,0x079007,0x07a007,0x07b007,0x07c007,0x07d007,0x07e007,0x07f007
+ .long 0x680007,0x081007,0x082007,0x083007,0x084007,0x085007,0x086007,0x087007
+ .long 0x688007,0x089007,0x08a007,0x08b007,0x08c007,0x08d007,0x08e007,0x08f007
+ .long 0x690007,0x091007,0x092007,0x093007,0x094007,0x095007,0x096007,0x097007
+ .long 0x698007,0x099007,0x09a007,0x09b007,0x09c007,0x09d007,0x09e007,0x09f007
+ .long 0x6a0007,0x0a1007,0x0a2007,0x0a3007,0x0a4007,0x0a5007,0x0a6007,0x0a7007
+ .long 0x6a8007,0x0a9007,0x0aa007,0x0ab007,0x0ac007,0x0ad007,0x0ae007,0x0af007
+ .long 0x6b0007,0x0b1007,0x0b2007,0x0b3007,0x0b4007,0x0b5007,0x0b6007,0x0b7007
+ .long 0x6b8007,0x0b9007,0x0ba007,0x0bb007,0x0bc007,0x0bd007,0x0be007,0x0bf007
+ .long 0x6c0007,0x0c1007,0x0c2007,0x0c3007,0x0c4007,0x0c5007,0x0c6007,0x0c7007
+ .long 0x6c8007,0x0c9007,0x0ca007,0x0cb007,0x0cc007,0x0cd007,0x0ce007,0x0cf007
+ .long 0x6d0007,0x0d1007,0x0d2007,0x0d3007,0x0d4007,0x0d5007,0x0d6007,0x0d7007
+ .long 0x6d8007,0x0d9007,0x0da007,0x0db007,0x0dc007,0x0dd007,0x0de007,0x0df007
+ .long 0x6e0007,0x0e1007,0x0e2007,0x0e3007,0x0e4007,0x0e5007,0x0e6007,0x0e7007
+ .long 0x6e8007,0x0e9007,0x0ea007,0x0eb007,0x0ec007,0x0ed007,0x0ee007,0x0ef007
+ .long 0x6f0007,0x0f1007,0x0f2007,0x0f3007,0x0f4007,0x0f5007,0x0f6007,0x0f7007
+ .long 0x6f8007,0x0f9007,0x0fa007,0x0fb007,0x0fc007,0x0fd007,0x0fe007,0x0ff007
+ .long 0x700007,0x001007,0x002007,0x003007,0x004007,0x005007,0x006007,0x007007
+ .long 0x708007,0x009007,0x00a007,0x00b007,0x00c007,0x00d007,0x00e007,0x00f007
+ .long 0x710007,0x011007,0x012007,0x013007,0x014007,0x015007,0x016007,0x017007
+ .long 0x718007,0x019007,0x01a007,0x01b007,0x01c007,0x01d007,0x01e007,0x01f007
+ .long 0x720007,0x021007,0x022007,0x023007,0x024007,0x025007,0x026007,0x027007
+ .long 0x728007,0x029007,0x02a007,0x02b007,0x02c007,0x02d007,0x02e007,0x02f007
+ .long 0x730007,0x031007,0x032007,0x033007,0x034007,0x035007,0x036007,0x037007
+ .long 0x738007,0x039007,0x03a007,0x03b007,0x03c007,0x03d007,0x03e007,0x03f007
+ .long 0x740007,0x041007,0x042007,0x043007,0x044007,0x045007,0x046007,0x047007
+ .long 0x748007,0x049007,0x04a007,0x04b007,0x04c007,0x04d007,0x04e007,0x04f007
+ .long 0x750007,0x051007,0x052007,0x053007,0x054007,0x055007,0x056007,0x057007
+ .long 0x758007,0x059007,0x05a007,0x05b007,0x05c007,0x05d007,0x05e007,0x05f007
+ .long 0x760007,0x061007,0x062007,0x063007,0x064007,0x065007,0x066007,0x067007
+ .long 0x768007,0x069007,0x06a007,0x06b007,0x06c007,0x06d007,0x06e007,0x06f007
+ .long 0x770007,0x071007,0x072007,0x073007,0x074007,0x075007,0x076007,0x077007
+ .long 0x778007,0x079007,0x07a007,0x07b007,0x07c007,0x07d007,0x07e007,0x07f007
+ .long 0x780007,0x081007,0x082007,0x083007,0x084007,0x085007,0x086007,0x087007
+ .long 0x788007,0x089007,0x08a007,0x08b007,0x08c007,0x08d007,0x08e007,0x08f007
+ .long 0x790007,0x091007,0x092007,0x093007,0x094007,0x095007,0x096007,0x097007
+ .long 0x798007,0x099007,0x09a007,0x09b007,0x09c007,0x09d007,0x09e007,0x09f007
+ .long 0x7a0007,0x0a1007,0x0a2007,0x0a3007,0x0a4007,0x0a5007,0x0a6007,0x0a7007
+ .long 0x7a8007,0x0a9007,0x0aa007,0x0ab007,0x0ac007,0x0ad007,0x0ae007,0x0af007
+ .long 0x7b0007,0x0b1007,0x0b2007,0x0b3007,0x0b4007,0x0b5007,0x0b6007,0x0b7007
+ .long 0x7b8007,0x0b9007,0x0ba007,0x0bb007,0x0bc007,0x0bd007,0x0be007,0x0bf007
+ .long 0x7c0007,0x0c1007,0x0c2007,0x0c3007,0x0c4007,0x0c5007,0x0c6007,0x0c7007
+ .long 0x7c8007,0x0c9007,0x0ca007,0x0cb007,0x0cc007,0x0cd007,0x0ce007,0x0cf007
+ .long 0x7d0007,0x0d1007,0x0d2007,0x0d3007,0x0d4007,0x0d5007,0x0d6007,0x0d7007
+ .long 0x7d8007,0x0d9007,0x0da007,0x0db007,0x0dc007,0x0dd007,0x0de007,0x0df007
+ .long 0x7e0007,0x0e1007,0x0e2007,0x0e3007,0x0e4007,0x0e5007,0x0e6007,0x0e7007
+ .long 0x7e8007,0x0e9007,0x0ea007,0x0eb007,0x0ec007,0x0ed007,0x0ee007,0x0ef007
+ .long 0x7f0007,0x0f1007,0x0f2007,0x0f3007,0x0f4007,0x0f5007,0x0f6007,0x0f7007
+ .long 0x7f8007,0x0f9007,0x0fa007,0x0fb007,0x0fc007,0x0fd007,0x0fe007,0x0ff007
.org 0x4000
-ENTRY(empty_bad_page_table)
+ENTRY(empty_zero_page)
.org 0x5000
-ENTRY(empty_zero_page)
+ENTRY(empty_bad_page)
.org 0x6000
+ENTRY(empty_bad_pte_table)
+
+#if CONFIG_X86_PAE
+
+ .org 0x7000
+ ENTRY(empty_bad_pmd_table)
+
+ .org 0x8000
+
+#else
+
+ .org 0x7000
+
+#endif
/*
* This starts the data section. Note that the above is all
* Naturally it's not a 1:1 relation, but there are similarities.
*/
+#include <linux/config.h>
#include <linux/ptrace.h>
#include <linux/errno.h>
#include <linux/signal.h>
#ifdef CONFIG_BLK_DEV_RAM
#include <linux/blk.h>
#endif
-#include <linux/bigmem.h>
+#include <linux/highmem.h>
+#include <linux/bootmem.h>
#include <asm/processor.h>
#include <linux/console.h>
#include <asm/uaccess.h>
#define LOWMEMSIZE() ((*(unsigned short *)__va(0x413)) * 1024)
-
void __init setup_memory_region(void)
{
-#define E820_DEBUG 0
+#define E820_DEBUG 1
#ifdef E820_DEBUG
int i;
#endif
memcpy(e820.map, E820_MAP, e820.nr_map * sizeof e820.map[0]);
#ifdef E820_DEBUG
for (i=0; i < e820.nr_map; i++) {
- printk("e820: %ld @ %08lx ",
- (unsigned long)(e820.map[i].size),
- (unsigned long)(e820.map[i].addr));
+ printk("e820: %08x @ %08x ", (int)e820.map[i].size,
+ (int)e820.map[i].addr);
switch (e820.map[i].type) {
case E820_RAM: printk("(usable)\n");
break;
} /* setup_memory_region */
-void __init setup_arch(char **cmdline_p, unsigned long * memory_start_p, unsigned long * memory_end_p)
+static inline void parse_mem_cmdline (char ** cmdline_p)
{
- unsigned long high_pfn, max_pfn;
char c = ' ', *to = command_line, *from = COMMAND_LINE;
int len = 0;
- int i;
- int usermem=0;
-
-#ifdef CONFIG_VISWS
- visws_get_board_type_and_rev();
-#endif
-
- ROOT_DEV = to_kdev_t(ORIG_ROOT_DEV);
- drive_info = DRIVE_INFO;
- screen_info = SCREEN_INFO;
- apm_bios_info = APM_BIOS_INFO;
- if( SYS_DESC_TABLE.length != 0 ) {
- MCA_bus = SYS_DESC_TABLE.table[3] &0x2;
- machine_id = SYS_DESC_TABLE.table[0];
- machine_submodel_id = SYS_DESC_TABLE.table[1];
- BIOS_revision = SYS_DESC_TABLE.table[2];
- }
- aux_device_present = AUX_DEVICE_INFO;
-
-#ifdef CONFIG_BLK_DEV_RAM
- rd_image_start = RAMDISK_FLAGS & RAMDISK_IMAGE_START_MASK;
- rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0);
- rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0);
-#endif
- setup_memory_region();
-
- if (!MOUNT_ROOT_RDONLY)
- root_mountflags &= ~MS_RDONLY;
- init_mm.start_code = (unsigned long) &_text;
- init_mm.end_code = (unsigned long) &_etext;
- init_mm.end_data = (unsigned long) &_edata;
- init_mm.brk = (unsigned long) &_end;
-
- code_resource.start = virt_to_bus(&_text);
- code_resource.end = virt_to_bus(&_etext)-1;
- data_resource.start = virt_to_bus(&_etext);
- data_resource.end = virt_to_bus(&_edata)-1;
+ int usermem = 0;
/* Save unparsed command line copy for /proc/cmdline */
memcpy(saved_command_line, COMMAND_LINE, COMMAND_LINE_SIZE);
* "mem=XXX[KkmM]@XXX[KkmM]" defines a memory region from
* <start> to <start>+<mem>, overriding the bios size.
*/
- if (c == ' ' && *(const unsigned long *)from == *(const unsigned long *)"mem=") {
- if (to != command_line) to--;
+ if (c == ' ' && !memcmp(from, "mem=", 4)) {
+ if (to != command_line)
+ to--;
if (!memcmp(from+4, "nopentium", 9)) {
from += 9+4;
boot_cpu_data.x86_capability &= ~X86_FEATURE_PSE;
}
mem_size = memparse(from+4, &from);
if (*from == '@')
- start_at = memparse(from+1,&from);
+ start_at = memparse(from+1, &from);
else {
start_at = HIGH_MEMORY;
mem_size -= HIGH_MEMORY;
}
*to = '\0';
*cmdline_p = command_line;
+}
- /* Find the highest page frame number we have available */
- max_pfn = 0;
- for (i=0; i < e820.nr_map; i++) {
- /* RAM? */
- if (e820.map[i].type == E820_RAM) {
- unsigned long end_pfn = (e820.map[i].addr + e820.map[i].size) >> PAGE_SHIFT;
+void __init setup_arch(char **cmdline_p)
+{
+ unsigned long bootmap_size;
+ unsigned long start_pfn, max_pfn, max_low_pfn;
+ int i;
- if (end_pfn > max_pfn)
- max_pfn = end_pfn;
- }
+#ifdef CONFIG_VISWS
+ visws_get_board_type_and_rev();
+#endif
+
+ ROOT_DEV = to_kdev_t(ORIG_ROOT_DEV);
+ drive_info = DRIVE_INFO;
+ screen_info = SCREEN_INFO;
+ apm_bios_info = APM_BIOS_INFO;
+ if( SYS_DESC_TABLE.length != 0 ) {
+ MCA_bus = SYS_DESC_TABLE.table[3] &0x2;
+ machine_id = SYS_DESC_TABLE.table[0];
+ machine_submodel_id = SYS_DESC_TABLE.table[1];
+ BIOS_revision = SYS_DESC_TABLE.table[2];
}
+ aux_device_present = AUX_DEVICE_INFO;
-/*
- * We can only allocate a limited amount of direct-mapped memory
- */
-#define VMALLOC_RESERVE (128 << 20) /* 128MB for vmalloc and initrd */
-#define MAXMEM ((unsigned long)(-PAGE_OFFSET-VMALLOC_RESERVE))
-#define MAXMEM_PFN (MAXMEM >> PAGE_SHIFT)
+#ifdef CONFIG_BLK_DEV_RAM
+ rd_image_start = RAMDISK_FLAGS & RAMDISK_IMAGE_START_MASK;
+ rd_prompt = ((RAMDISK_FLAGS & RAMDISK_PROMPT_FLAG) != 0);
+ rd_doload = ((RAMDISK_FLAGS & RAMDISK_LOAD_FLAG) != 0);
+#endif
+ setup_memory_region();
- high_pfn = MAXMEM_PFN;
- if (max_pfn < high_pfn)
- high_pfn = max_pfn;
+ if (!MOUNT_ROOT_RDONLY)
+ root_mountflags &= ~MS_RDONLY;
+ init_mm.start_code = (unsigned long) &_text;
+ init_mm.end_code = (unsigned long) &_etext;
+ init_mm.end_data = (unsigned long) &_edata;
+ init_mm.brk = (unsigned long) &_end;
+
+ code_resource.start = virt_to_bus(&_text);
+ code_resource.end = virt_to_bus(&_etext)-1;
+ data_resource.start = virt_to_bus(&_etext);
+ data_resource.end = virt_to_bus(&_edata)-1;
+
+ parse_mem_cmdline(cmdline_p);
+
+#define PFN_UP(x) (((x) + PAGE_SIZE-1) >> PAGE_SHIFT)
+#define PFN_DOWN(x) ((x) >> PAGE_SHIFT)
+#define PFN_PHYS(x) ((x) << PAGE_SHIFT)
/*
- * But the bigmem stuff may be able to use more of it
- * (but currently only up to about 4GB)
+ * 128MB for vmalloc and initrd
*/
-#ifdef CONFIG_BIGMEM
- #define MAXBIGMEM ((unsigned long)(~(VMALLOC_RESERVE-1)))
- #define MAXBIGMEM_PFN (MAXBIGMEM >> PAGE_SHIFT)
- if (max_pfn > MAX_PFN)
- max_pfn = MAX_PFN;
-
-/* When debugging, make half of "normal" memory be BIGMEM memory instead */
-#ifdef BIGMEM_DEBUG
- high_pfn >>= 1;
-#endif
+#define VMALLOC_RESERVE (unsigned long)(128 << 20)
+#define MAXMEM (unsigned long)(-PAGE_OFFSET-VMALLOC_RESERVE)
+#define MAXMEM_PFN PFN_DOWN(MAXMEM)
+
+ /*
+ * partially used pages are not usable - thus
+ * we are rounding upwards:
+ */
+ start_pfn = PFN_UP(__pa(&_end));
+
+ /*
+ * Find the highest page frame number we have available
+ */
+ max_pfn = 0;
+ for (i = 0; i < e820.nr_map; i++) {
+ unsigned long curr_pfn;
+ /* RAM? */
+ if (e820.map[i].type != E820_RAM)
+ continue;
+ curr_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
+ if (curr_pfn > max_pfn)
+ max_pfn = curr_pfn;
+ }
- bigmem_start = high_pfn << PAGE_SHIFT;
- bigmem_end = max_pfn << PAGE_SHIFT;
- printk(KERN_NOTICE "%ldMB BIGMEM available.\n", (bigmem_end-bigmem_start) >> 20);
+ /*
+ * Determine low and high memory ranges:
+ */
+ max_low_pfn = max_pfn;
+ if (max_low_pfn > MAXMEM_PFN)
+ max_low_pfn = MAXMEM_PFN;
+
+#ifdef CONFIG_HIGHMEM
+ highstart_pfn = highend_pfn = max_pfn;
+ if (max_pfn > MAXMEM_PFN) {
+ highstart_pfn = MAXMEM_PFN;
+ highend_pfn = max_pfn;
+ printk(KERN_NOTICE "%ldMB HIGHMEM available.\n",
+ pages_to_mb(highend_pfn - highstart_pfn));
+ }
#endif
+ /*
+ * Initialize the boot-time allocator (with low memory only):
+ */
+ bootmap_size = init_bootmem(start_pfn, max_low_pfn);
- ram_resources[1].end = (high_pfn << PAGE_SHIFT)-1;
+ /*
+ * FIXME: what about high memory?
+ */
+ ram_resources[1].end = PFN_PHYS(max_low_pfn);
- *memory_start_p = (unsigned long) &_end;
- *memory_end_p = PAGE_OFFSET + (high_pfn << PAGE_SHIFT);
+ /*
+ * Register fully available low RAM pages with the bootmem allocator.
+ */
+ for (i = 0; i < e820.nr_map; i++) {
+ unsigned long curr_pfn, last_pfn, size;
+ /*
+ * Reserve usable low memory
+ */
+ if (e820.map[i].type != E820_RAM)
+ continue;
+ /*
+ * We are rounding up the start address of usable memory:
+ */
+ curr_pfn = PFN_UP(e820.map[i].addr);
+ if (curr_pfn >= max_low_pfn)
+ continue;
+ /*
+ * ... and at the end of the usable range downwards:
+ */
+ last_pfn = PFN_DOWN(e820.map[i].addr + e820.map[i].size);
+
+ if (last_pfn > max_low_pfn)
+ last_pfn = max_low_pfn;
+ size = last_pfn - curr_pfn;
+ free_bootmem(PFN_PHYS(curr_pfn), PFN_PHYS(size));
+ }
+ /*
+ * Reserve the bootmem bitmap itself as well. We do this in two
+ * steps (first step was init_bootmem()) because this catches
+ * the (very unlikely) case of us accidentally initializing the
+ * bootmem allocator with an invalid RAM area.
+ */
+ reserve_bootmem(HIGH_MEMORY, (PFN_PHYS(start_pfn) +
+ bootmap_size + PAGE_SIZE-1) - (HIGH_MEMORY));
+
+ /*
+ * reserve physical page 0 - it's a special BIOS page on many boxes,
+ * enabling clean reboots, SMP operation, laptop functions.
+ */
+ reserve_bootmem(0, PAGE_SIZE);
+
+#ifdef __SMP__
+ /*
+ * But first pinch a few for the stack/trampoline stuff
+ * FIXME: Don't need the extra page at 4K, but need to fix
+ * trampoline before removing it. (see the GDT stuff)
+ */
+ reserve_bootmem(PAGE_SIZE, PAGE_SIZE);
+ smp_alloc_memory(); /* AP processor realmode stacks in low memory*/
+#endif
#ifdef __SMP__
/*
#endif
#ifdef CONFIG_BLK_DEV_INITRD
+// FIXME needs to do the new bootmem alloc stuff
if (LOADER_TYPE) {
initrd_start = INITRD_START ? INITRD_START + PAGE_OFFSET : 0;
initrd_end = initrd_start+INITRD_SIZE;
- if (initrd_end > memory_end) {
+ if (initrd_end > (max_low_pfn << PAGE_SHIFT)) {
printk("initrd extends beyond end of memory "
"(0x%08lx > 0x%08lx)\ndisabling initrd\n",
initrd_end,memory_end);
#include <linux/kernel_stat.h>
#include <linux/smp_lock.h>
#include <linux/irq.h>
+#include <linux/bootmem.h>
#include <linux/delay.h>
#include <linux/mc146818rtc.h>
* We are called very early to get the low memory for the
* SMP bootup trampoline page.
*/
-unsigned long __init smp_alloc_memory(unsigned long mem_base)
+void __init smp_alloc_memory(void)
{
- if (virt_to_phys((void *)mem_base) >= 0x9F000)
+ trampoline_base = (void *) alloc_bootmem_pages(PAGE_SIZE);
+ /*
+ * Has to be in very low memory so we can execute
+ * real-mode AP code.
+ */
+ if (__pa(trampoline_base) >= 0x9F000)
BUG();
- trampoline_base = (void *)mem_base;
- return mem_base + PAGE_SIZE;
}
/*
apic_write(APIC_DFR, value);
}
-unsigned long __init init_smp_mappings(unsigned long memory_start)
+void __init init_smp_mappings(void)
{
unsigned long apic_phys;
- memory_start = PAGE_ALIGN(memory_start);
if (smp_found_config) {
apic_phys = mp_lapic_addr;
} else {
* could use the real zero-page, but it's safer
* this way if some buggy code writes to this page ...
*/
- apic_phys = __pa(memory_start);
- memset((void *)memory_start, 0, PAGE_SIZE);
- memory_start += PAGE_SIZE;
+ apic_phys = __pa(alloc_bootmem_pages(PAGE_SIZE));
+ memset((void *)apic_phys, 0, PAGE_SIZE);
}
- set_fixmap(FIX_APIC_BASE,apic_phys);
+ set_fixmap(FIX_APIC_BASE, apic_phys);
dprintk("mapped APIC to %08lx (%08lx)\n", APIC_BASE, apic_phys);
#ifdef CONFIG_X86_IO_APIC
if (smp_found_config) {
ioapic_phys = mp_ioapics[i].mpc_apicaddr;
} else {
- ioapic_phys = __pa(memory_start);
- memset((void *)memory_start, 0, PAGE_SIZE);
- memory_start += PAGE_SIZE;
+ ioapic_phys = __pa(alloc_bootmem_pages(PAGE_SIZE));
+ memset((void *)ioapic_phys, 0, PAGE_SIZE);
}
set_fixmap(idx,ioapic_phys);
dprintk("mapped IOAPIC to %08lx (%08lx)\n",
}
}
#endif
-
- return memory_start;
}
/*
smp_callin();
while (!atomic_read(&smp_commenced))
/* nothing */ ;
+ /*
+ * low-memory mappings have been cleared, flush them from
+ * the local TLBs too.
+ */
+ local_flush_tlb();
+
return cpu_idle();
}
static void __init do_boot_cpu(int i)
{
unsigned long cfg;
- pgd_t maincfg;
struct task_struct *idle;
unsigned long send_status, accept_status;
int timeout, num_starts, j;
*((volatile unsigned short *) phys_to_virt(0x467)) = start_eip & 0xf;
dprintk("3.\n");
- maincfg=swapper_pg_dir[0];
- ((unsigned long *)swapper_pg_dir)[0]=0x102007;
-
/*
* Be paranoid about clearing APIC errors.
*/
cpucount--;
}
- swapper_pg_dir[0]=maincfg;
- local_flush_tlb();
-
/* mark "stuck" area as not stuck */
*((volatile unsigned long *)phys_to_virt(8192)) = 0;
}
#ifndef CONFIG_VISWS
{
- unsigned long cfg;
-
/*
* Install writable page 0 entry to set BIOS data area.
*/
- cfg = pg0[0];
- /* writeable, present, addr 0 */
- pg0[0] = _PAGE_RW | _PAGE_PRESENT | 0;
local_flush_tlb();
/*
CMOS_WRITE(0, 0xf);
*((volatile long *) phys_to_virt(0x467)) = 0;
-
- /*
- * Restore old page 0 entry.
- */
- pg0[0] = cfg;
- local_flush_tlb();
}
#endif
*/
if (cpu_has_tsc && cpucount)
synchronize_tsc_bp();
+
+ zap_low_mappings();
}
#endif /* CONFIG_MATH_EMULATION */
+#ifndef CONFIG_M686
void __init trap_init_f00f_bug(void)
{
unsigned long page;
pgd = pgd_offset(&init_mm, page);
pmd = pmd_offset(pgd, page);
pte = pte_offset(pmd, page);
- free_page(pte_page(*pte));
- *pte = mk_pte(&idt_table, PAGE_KERNEL_RO);
+ __free_page(pte_page(*pte));
+ *pte = mk_pte_phys(__pa(&idt_table), PAGE_KERNEL_RO);
local_flush_tlb();
/*
idt = (struct desc_struct *)page;
__asm__ __volatile__("lidt %0": "=m" (idt_descr));
}
+#endif
#define _set_gate(gate_addr,type,dpl,addr) \
do { \
#endif
void __init trap_init(void)
{
- if (readl(0x0FFFD9) == 'E' + ('I'<<8) + ('S'<<16) + ('A'<<24))
+ if (isa_readl(0x0FFFD9) == 'E'+('I'<<8)+('S'<<16)+('A'<<24))
EISA_bus = 1;
set_trap_gate(0,÷_error);
if (pgd_none(*pgd))
return;
if (pgd_bad(*pgd)) {
- printk("vm86: bad pgd entry [%p]:%08lx\n", pgd, pgd_val(*pgd));
+ pgd_ERROR(*pgd);
pgd_clear(pgd);
return;
}
if (pmd_none(*pmd))
return;
if (pmd_bad(*pmd)) {
- printk("vm86: bad pmd entry [%p]:%08lx\n", pmd, pmd_val(*pmd));
+ pmd_ERROR(*pmd);
pmd_clear(pmd);
return;
}
O_TARGET := mm.o
O_OBJS := init.o fault.o ioremap.o extable.o
-ifeq ($(CONFIG_BIGMEM),y)
-O_OBJS += bigmem.o
-endif
-
include $(TOPDIR)/Rules.make
+++ /dev/null
-/*
- * BIGMEM IA32 code and variables.
- *
- * (C) 1999 Andrea Arcangeli, SuSE GmbH, andrea@suse.de
- * Gerhard Wichert, Siemens AG, Gerhard.Wichert@pdb.siemens.de
- */
-
-#include <linux/mm.h>
-#include <linux/bigmem.h>
-
-unsigned long bigmem_start, bigmem_end;
-
-/* NOTE: fixmap_init alloc all the fixmap pagetables contigous on the
- physical space so we can cache the place of the first one and move
- around without checking the pgd every time. */
-pte_t *kmap_pte;
-pgprot_t kmap_prot;
-
-#define kmap_get_fixmap_pte(vaddr) \
- pte_offset(pmd_offset(pgd_offset_k(vaddr), (vaddr)), (vaddr))
-
-void __init kmap_init(void)
-{
- unsigned long kmap_vstart;
-
- /* cache the first kmap pte */
- kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN);
- kmap_pte = kmap_get_fixmap_pte(kmap_vstart);
-
- kmap_prot = PAGE_KERNEL;
- if (boot_cpu_data.x86_capability & X86_FEATURE_PGE)
- pgprot_val(kmap_prot) |= _PAGE_GLOBAL;
-}
return 0;
}
+static inline void handle_wp_test (void)
+{
+ const unsigned long vaddr = PAGE_OFFSET;
+ pgd_t *pgd;
+ pmd_t *pmd;
+ pte_t *pte;
+
+ /*
+ * make it read/writable temporarily, so that the fault
+ * can be handled.
+ */
+ pgd = swapper_pg_dir + __pgd_offset(vaddr);
+ pmd = pmd_offset(pgd, vaddr);
+ pte = pte_offset(pmd, vaddr);
+ *pte = mk_pte_phys(0, PAGE_KERNEL);
+ local_flush_tlb();
+
+ boot_cpu_data.wp_works_ok = 1;
+ /*
+ * Beware: Black magic here. The printk is needed here to flush
+ * CPU state on certain buggy processors.
+ */
+ printk("Ok");
+}
+
asmlinkage void do_invalid_op(struct pt_regs *, unsigned long);
extern unsigned long idt;
* First we check if it was the bootup rw-test, though..
*/
if (boot_cpu_data.wp_works_ok < 0 &&
- address == PAGE_OFFSET && (error_code & 1)) {
- boot_cpu_data.wp_works_ok = 1;
- pg0[0] = pte_val(mk_pte(PAGE_OFFSET, PAGE_KERNEL));
- local_flush_tlb();
- /*
- * Beware: Black magic here. The printk is needed here to flush
- * CPU state on certain buggy processors.
- */
- printk("Ok");
+ address == PAGE_OFFSET && (error_code & 1)) {
+ handle_wp_test();
return;
}
#ifdef CONFIG_BLK_DEV_INITRD
#include <linux/blk.h>
#endif
-#include <linux/bigmem.h>
+#include <linux/highmem.h>
+#include <linux/pagemap.h>
+#include <linux/bootmem.h>
#include <asm/processor.h>
#include <asm/system.h>
#include <asm/fixmap.h>
#include <asm/e820.h>
-static unsigned long totalram = 0;
-static unsigned long totalbig = 0;
+unsigned long highstart_pfn, highend_pfn;
+static unsigned long totalram_pages = 0;
+static unsigned long totalhigh_pages = 0;
extern void show_net_buffers(void);
-extern unsigned long init_smp_mappings(unsigned long);
-void __bad_pte_kernel(pmd_t *pmd)
+/*
+ * BAD_PAGE is the page that is used for page faults when linux
+ * is out-of-memory. Older versions of linux just did a
+ * do_exit(), but using this instead means there is less risk
+ * for a process dying in kernel mode, possibly leaving an inode
+ * unused etc..
+ *
+ * BAD_PAGETABLE is the accompanying page-table: it is initialized
+ * to point to BAD_PAGE entries.
+ *
+ * ZERO_PAGE is a special page that is used for zero-initialized
+ * data and COW.
+ */
+
+/*
+ * These are allocated in head.S so that we get proper page alignment.
+ * If you change the size of these then change head.S as well.
+ */
+extern char empty_bad_page[PAGE_SIZE];
+#if CONFIG_X86_PAE
+extern pmd_t empty_bad_pmd_table[PTRS_PER_PMD];
+#endif
+extern pte_t empty_bad_pte_table[PTRS_PER_PTE];
+
+/*
+ * We init them before every return and make them writable-shared.
+ * This guarantees we get out of the kernel in some more or less sane
+ * way.
+ */
+#if CONFIG_X86_PAE
+static pmd_t * get_bad_pmd_table(void)
{
- printk("Bad pmd in pte_alloc: %08lx\n", pmd_val(*pmd));
- pmd_val(*pmd) = _KERNPG_TABLE + __pa(BAD_PAGETABLE);
+ pmd_t v;
+ int i;
+
+ pmd_val(v) = _PAGE_TABLE + __pa(empty_bad_pte_table);
+
+ for (i = 0; i < PAGE_SIZE/sizeof(pmd_t); i++)
+ empty_bad_pmd_table[i] = v;
+
+ return empty_bad_pmd_table;
}
+#endif
-void __bad_pte(pmd_t *pmd)
+static pte_t * get_bad_pte_table(void)
{
- printk("Bad pmd in pte_alloc: %08lx\n", pmd_val(*pmd));
- pmd_val(*pmd) = _PAGE_TABLE + __pa(BAD_PAGETABLE);
+ pte_t v;
+ int i;
+
+ v = pte_mkdirty(mk_pte_phys(__pa(empty_bad_page), PAGE_SHARED));
+
+ for (i = 0; i < PAGE_SIZE/sizeof(pte_t); i++)
+ empty_bad_pte_table[i] = v;
+
+ return empty_bad_pte_table;
+}
+
+
+
+void __handle_bad_pmd(pmd_t *pmd)
+{
+ pmd_ERROR(*pmd);
+ pmd_val(*pmd) = _PAGE_TABLE + __pa(get_bad_pte_table());
+}
+
+void __handle_bad_pmd_kernel(pmd_t *pmd)
+{
+ pmd_ERROR(*pmd);
+ pmd_val(*pmd) = _KERNPG_TABLE + __pa(get_bad_pte_table());
}
pte_t *get_pte_kernel_slow(pmd_t *pmd, unsigned long offset)
pte = (pte_t *) __get_free_page(GFP_KERNEL);
if (pmd_none(*pmd)) {
if (pte) {
- clear_page((unsigned long)pte);
+ clear_page(pte);
pmd_val(*pmd) = _KERNPG_TABLE + __pa(pte);
return pte + offset;
}
- pmd_val(*pmd) = _KERNPG_TABLE + __pa(BAD_PAGETABLE);
+ pmd_val(*pmd) = _KERNPG_TABLE + __pa(get_bad_pte_table());
return NULL;
}
free_page((unsigned long)pte);
if (pmd_bad(*pmd)) {
- __bad_pte_kernel(pmd);
+ __handle_bad_pmd_kernel(pmd);
return NULL;
}
return (pte_t *) pmd_page(*pmd) + offset;
pte = (unsigned long) __get_free_page(GFP_KERNEL);
if (pmd_none(*pmd)) {
if (pte) {
- clear_page(pte);
+ clear_page((void *)pte);
pmd_val(*pmd) = _PAGE_TABLE + __pa(pte);
- return (pte_t *)(pte + offset);
+ return (pte_t *)pte + offset;
}
- pmd_val(*pmd) = _PAGE_TABLE + __pa(BAD_PAGETABLE);
+ pmd_val(*pmd) = _PAGE_TABLE + __pa(get_bad_pte_table());
return NULL;
}
free_page(pte);
if (pmd_bad(*pmd)) {
- __bad_pte(pmd);
+ __handle_bad_pmd(pmd);
return NULL;
}
- return (pte_t *) (pmd_page(*pmd) + offset);
+ return (pte_t *) pmd_page(*pmd) + offset;
}
int do_check_pgt_cache(int low, int high)
return freed;
}
-/*
- * BAD_PAGE is the page that is used for page faults when linux
- * is out-of-memory. Older versions of linux just did a
- * do_exit(), but using this instead means there is less risk
- * for a process dying in kernel mode, possibly leaving an inode
- * unused etc..
- *
- * BAD_PAGETABLE is the accompanying page-table: it is initialized
- * to point to BAD_PAGE entries.
- *
- * ZERO_PAGE is a special page that is used for zero-initialized
- * data and COW.
- */
-pte_t * __bad_pagetable(void)
-{
- extern char empty_bad_page_table[PAGE_SIZE];
- int d0, d1;
-
- __asm__ __volatile__("cld ; rep ; stosl"
- : "=&D" (d0), "=&c" (d1)
- : "a" (pte_val(BAD_PAGE)),
- "0" ((long) empty_bad_page_table),
- "1" (PAGE_SIZE/4)
- : "memory");
- return (pte_t *) empty_bad_page_table;
-}
+/* NOTE: fixmap_init alloc all the fixmap pagetables contigous on the
+ physical space so we can cache the place of the first one and move
+ around without checking the pgd every time. */
+
+#if CONFIG_HIGHMEM
+pte_t *kmap_pte;
+pgprot_t kmap_prot;
-pte_t __bad_page(void)
+#define kmap_get_fixmap_pte(vaddr) \
+ pte_offset(pmd_offset(pgd_offset_k(vaddr), (vaddr)), (vaddr))
+
+void __init kmap_init(void)
{
- extern char empty_bad_page[PAGE_SIZE];
- int d0, d1;
-
- __asm__ __volatile__("cld ; rep ; stosl"
- : "=&D" (d0), "=&c" (d1)
- : "a" (0),
- "0" ((long) empty_bad_page),
- "1" (PAGE_SIZE/4)
- : "memory");
- return pte_mkdirty(mk_pte((unsigned long) empty_bad_page, PAGE_SHARED));
+ unsigned long kmap_vstart;
+
+ /* cache the first kmap pte */
+ kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN);
+ kmap_pte = kmap_get_fixmap_pte(kmap_vstart);
+
+ kmap_prot = PAGE_KERNEL;
+ if (boot_cpu_data.x86_capability & X86_FEATURE_PGE)
+ pgprot_val(kmap_prot) |= _PAGE_GLOBAL;
}
+#endif
void show_mem(void)
{
- int i,free = 0,total = 0,reserved = 0;
+ int i,free = 0, total = 0, reserved = 0;
int shared = 0, cached = 0;
- int bigmem = 0;
+ int highmem = 0;
printk("Mem-info:\n");
show_free_areas();
i = max_mapnr;
while (i-- > 0) {
total++;
- if (PageBIGMEM(mem_map+i))
- bigmem++;
+ if (PageHighMem(mem_map+i))
+ highmem++;
if (PageReserved(mem_map+i))
reserved++;
else if (PageSwapCache(mem_map+i))
else
shared += page_count(mem_map+i) - 1;
}
- printk("%d pages of RAM\n",total);
- printk("%d pages of BIGMEM\n",bigmem);
+ printk("%d pages of RAM\n", total);
+ printk("%d pages of HIGHMEM\n",highmem);
printk("%d reserved pages\n",reserved);
printk("%d pages shared\n",shared);
printk("%d pages swap cached\n",cached);
#endif
}
-extern unsigned long free_area_init(unsigned long, unsigned long);
-
/* References to section boundaries */
extern char _text, _etext, _edata, __bss_start, _end;
extern char __init_begin, __init_end;
-/*
- * allocate page table(s) for compile-time fixed mappings
- */
-static unsigned long __init fixmap_init(unsigned long start_mem)
-{
- pgd_t * pg_dir;
- unsigned int idx;
- unsigned long address;
-
- start_mem = PAGE_ALIGN(start_mem);
-
- for (idx=1; idx <= __end_of_fixed_addresses; idx += PTRS_PER_PTE)
- {
- address = __fix_to_virt(__end_of_fixed_addresses-idx);
- pg_dir = swapper_pg_dir + (address >> PGDIR_SHIFT);
- memset((void *)start_mem, 0, PAGE_SIZE);
- pgd_val(*pg_dir) = _PAGE_TABLE | __pa(start_mem);
- start_mem += PAGE_SIZE;
- }
-
- return start_mem;
-}
-
static void set_pte_phys (unsigned long vaddr, unsigned long phys)
{
pgprot_t prot;
- pte_t * pte;
+ pgd_t *pgd;
+ pmd_t *pmd;
+ pte_t *pte;
- pte = pte_offset(pmd_offset(pgd_offset_k(vaddr), vaddr), vaddr);
+ pgd = swapper_pg_dir + __pgd_offset(vaddr);
+ pmd = pmd_offset(pgd, vaddr);
+ pte = pte_offset(pmd, vaddr);
prot = PAGE_KERNEL;
if (boot_cpu_data.x86_capability & X86_FEATURE_PGE)
pgprot_val(prot) |= _PAGE_GLOBAL;
set_pte(pte, mk_pte_phys(phys, prot));
- local_flush_tlb();
+ /*
+ * It's enough to flush this one mapping.
+ */
+ __flush_tlb_one(vaddr);
}
void set_fixmap (enum fixed_addresses idx, unsigned long phys)
set_pte_phys (address,phys);
}
+static void __init pagetable_init(void)
+{
+ pgd_t *pgd, *pgd_base;
+ pmd_t *pmd;
+ pte_t *pte;
+ int i, j, k;
+ unsigned long vaddr;
+ unsigned long end = (unsigned long)__va(max_low_pfn*PAGE_SIZE);
+
+ pgd_base = swapper_pg_dir;
+
+ vaddr = PAGE_OFFSET;
+ i = __pgd_offset(vaddr);
+ pgd = pgd_base + i;
+
+ for (; (i < PTRS_PER_PGD) && (vaddr <= end); pgd++, i++) {
+ vaddr = i*PGDIR_SIZE;
+#if CONFIG_X86_PAE
+ pmd = (pmd_t *) alloc_bootmem_pages(PAGE_SIZE);
+ memset((void*)pmd, 0, PAGE_SIZE);
+ pgd_val(*pgd) = __pa(pmd) + 0x1;
+#else
+ pmd = (pmd_t *)pgd;
+#endif
+ if (pmd != pmd_offset(pgd, 0))
+ BUG();
+ for (j = 0; (j < PTRS_PER_PMD) && (vaddr <= end); pmd++, j++) {
+ vaddr = i*PGDIR_SIZE + j*PMD_SIZE;
+ if (cpu_has_pse) {
+ unsigned long __pe;
+
+ set_in_cr4(X86_CR4_PSE);
+ boot_cpu_data.wp_works_ok = 1;
+ __pe = _KERNPG_TABLE + _PAGE_PSE + __pa(vaddr);
+ /* Make it "global" too if supported */
+ if (cpu_has_pge) {
+ set_in_cr4(X86_CR4_PGE);
+ __pe += _PAGE_GLOBAL;
+ }
+ pmd_val(*pmd) = __pe;
+ continue;
+ }
+
+ pte = (pte_t *) alloc_bootmem_pages(PAGE_SIZE);
+ memset((void*)pte, 0, PAGE_SIZE);
+ pmd_val(*pmd) = _KERNPG_TABLE + __pa(pte);
+
+ if (pte != pte_offset(pmd, 0))
+ BUG();
+
+ for (k = 0;
+ (k < PTRS_PER_PTE) && (vaddr <= end);
+ pte++, k++) {
+ vaddr = i*PGDIR_SIZE + j*PMD_SIZE + k*PAGE_SIZE;
+ *pte = mk_pte_phys(__pa(vaddr), PAGE_KERNEL);
+ }
+ }
+ }
+
+ /*
+ * Fixed mappings, only the page table structure has to be
+ * created - mappings will be set by set_fixmap():
+ */
+
+ vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK;
+ i = __pgd_offset(vaddr);
+ j = __pmd_offset(vaddr);
+ pgd = pgd_base + i;
+
+ for ( ; (i < PTRS_PER_PGD) && vaddr; pgd++, i++) {
+#if CONFIG_X86_PAE
+ if (pgd_none(*pgd)) {
+ pmd = (pmd_t *) alloc_bootmem_pages(PAGE_SIZE);
+ memset((void*)pmd, 0, PAGE_SIZE);
+ pgd_val(*pgd) = __pa(pmd) + 0x1;
+ if (pmd != pmd_offset(pgd, vaddr))
+ BUG();
+ }
+ pmd = pmd_offset(pgd, vaddr);
+#else
+ pmd = (pmd_t *)pgd;
+#endif
+ for (; (j < PTRS_PER_PMD) && vaddr; pmd++, j++) {
+ if (pmd_none(*pmd)) {
+ pte = (pte_t *) alloc_bootmem_pages(PAGE_SIZE);
+ memset((void*)pte, 0, PAGE_SIZE);
+ pmd_val(*pmd) = _KERNPG_TABLE + __pa(pte);
+ if (pte != pte_offset(pmd, 0))
+ BUG();
+ }
+ vaddr += PMD_SIZE;
+ }
+ j = 0;
+ }
+
+#if CONFIG_X86_PAE
+ /*
+ * Add low memory identity-mappings - SMP needs it when
+ * starting up on an AP from real-mode. In the non-PAE
+ * case we already have these mappings through head.S.
+ * All user-space mappings are explicitly cleared after
+ * SMP startup.
+ */
+ pgd_base[0] = pgd_base[USER_PTRS_PER_PGD];
+#endif
+}
+
+void __init zap_low_mappings (void)
+{
+ int i;
+ /*
+ * Zap initial low-memory mappings:
+ */
+ for (i = 0; i < USER_PTRS_PER_PGD; i++)
+ pgd_clear(swapper_pg_dir + i);
+}
+
/*
* paging_init() sets up the page tables - note that the first 4MB are
* already mapped by head.S.
* This routines also unmaps the page at virtual kernel address 0, so
* that we can trap those pesky NULL-reference errors in the kernel.
*/
-unsigned long __init paging_init(unsigned long start_mem, unsigned long end_mem)
+void __init paging_init(void)
{
- pgd_t * pg_dir;
- pte_t * pg_table;
- unsigned long tmp;
- unsigned long address;
+ pagetable_init();
-/*
- * Physical page 0 is special; it's not touched by Linux since BIOS
- * and SMM (for laptops with [34]86/SL chips) may need it. It is read
- * and write protected to detect null pointer references in the
- * kernel.
- * It may also hold the MP configuration table when we are booting SMP.
- */
- start_mem = PAGE_ALIGN(start_mem);
- address = PAGE_OFFSET;
- pg_dir = swapper_pg_dir;
- /* unmap the original low memory mappings */
- pgd_val(pg_dir[0]) = 0;
-
- /* Map whole memory from PAGE_OFFSET */
- pg_dir += USER_PGD_PTRS;
- while (address < end_mem) {
- /*
- * If we're running on a Pentium CPU, we can use the 4MB
- * page tables.
- *
- * The page tables we create span up to the next 4MB
- * virtual memory boundary, but that's OK as we won't
- * use that memory anyway.
- */
- if (boot_cpu_data.x86_capability & X86_FEATURE_PSE) {
- unsigned long __pe;
-
- set_in_cr4(X86_CR4_PSE);
- boot_cpu_data.wp_works_ok = 1;
- __pe = _KERNPG_TABLE + _PAGE_4M + __pa(address);
- /* Make it "global" too if supported */
- if (boot_cpu_data.x86_capability & X86_FEATURE_PGE) {
- set_in_cr4(X86_CR4_PGE);
- __pe += _PAGE_GLOBAL;
- }
- pgd_val(*pg_dir) = __pe;
- pg_dir++;
- address += 4*1024*1024;
- continue;
- }
+ __asm__( "movl %%ecx,%%cr3\n" ::"c"(__pa(swapper_pg_dir)));
- /*
- * We're on a [34]86, use normal page tables.
- * pg_table is physical at this point
- */
- pg_table = (pte_t *) (PAGE_MASK & pgd_val(*pg_dir));
- if (!pg_table) {
- pg_table = (pte_t *) __pa(start_mem);
- start_mem += PAGE_SIZE;
- }
+#if CONFIG_X86_PAE
+ /*
+ * We will bail out later - printk doesnt work right now so
+ * the user would just see a hanging kernel.
+ */
+ if (cpu_has_pae)
+ set_in_cr4(X86_CR4_PAE);
+#endif
+
+ __flush_tlb();
- pgd_val(*pg_dir) = _PAGE_TABLE | (unsigned long) pg_table;
- pg_dir++;
-
- /* now change pg_table to kernel virtual addresses */
- pg_table = (pte_t *) __va(pg_table);
- for (tmp = 0 ; tmp < PTRS_PER_PTE ; tmp++,pg_table++) {
- pte_t pte = mk_pte(address, PAGE_KERNEL);
- if (address >= end_mem)
- pte_val(pte) = 0;
- set_pte(pg_table, pte);
- address += PAGE_SIZE;
- }
- }
- start_mem = fixmap_init(start_mem);
#ifdef __SMP__
- start_mem = init_smp_mappings(start_mem);
+ init_smp_mappings();
#endif
- local_flush_tlb();
-#ifndef CONFIG_BIGMEM
- return free_area_init(start_mem, end_mem);
-#else
+#ifdef CONFIG_HIGHMEM
kmap_init(); /* run after fixmap_init */
- return free_area_init(start_mem, bigmem_end + PAGE_OFFSET);
#endif
+#ifdef CONFIG_HIGHMEM
+ free_area_init(highend_pfn);
+#else
+ free_area_init(max_low_pfn);
+#endif
+ return;
}
/*
void __init test_wp_bit(void)
{
- unsigned char tmp_reg;
- unsigned long old = pg0[0];
+/*
+ * Ok, all PAE-capable CPUs are definitely handling the WP bit right.
+ */
+//#ifndef CONFIG_X86_PAE
+ const unsigned long vaddr = PAGE_OFFSET;
+ pgd_t *pgd;
+ pmd_t *pmd;
+ pte_t *pte, old_pte;
+ char tmp_reg;
printk("Checking if this processor honours the WP bit even in supervisor mode... ");
- pg0[0] = pte_val(mk_pte(PAGE_OFFSET, PAGE_READONLY));
+
+ pgd = swapper_pg_dir + __pgd_offset(vaddr);
+ pmd = pmd_offset(pgd, vaddr);
+ pte = pte_offset(pmd, vaddr);
+ old_pte = *pte;
+ *pte = mk_pte_phys(0, PAGE_READONLY);
local_flush_tlb();
+
__asm__ __volatile__(
"jmp 1f; 1:\n"
"movb %0,%1\n"
"movb %1,%0\n"
"jmp 1f; 1:\n"
- :"=m" (*(char *) __va(0)),
+ :"=m" (*(char *) vaddr),
"=q" (tmp_reg)
:/* no inputs */
:"memory");
- pg0[0] = old;
+
+ *pte = old_pte;
local_flush_tlb();
+
if (boot_cpu_data.wp_works_ok < 0) {
boot_cpu_data.wp_works_ok = 0;
printk("No.\n");
#endif
} else
printk(".\n");
+//#endif
}
-static void __init mem_init_region(unsigned long pfn, unsigned long count, unsigned long start_mem_pfn)
+static inline int page_is_ram (unsigned long pagenr)
{
- printk("memory region: %luk @ %08lx000\n", count << 2, pfn);
+ int i;
- do {
- if (pfn >= max_mapnr)
- break;
+ for (i = 0; i < e820.nr_map; i++) {
+ unsigned long addr, size;
- /* Avoid the kernel mapping between HIGH_MEMORY and "start_mem".. */
- if (pfn < (HIGH_MEMORY >> PAGE_SHIFT) || pfn >= start_mem_pfn)
- clear_bit(PG_reserved, &mem_map[pfn].flags);
-
- pfn++;
- } while (--count > 0);
+ if (e820.map[i].type != E820_RAM) /* not usable memory */
+ continue;
+ addr = (e820.map[i].addr+PAGE_SIZE-1) >> PAGE_SHIFT;
+ size = e820.map[i].size >> PAGE_SHIFT;
+ if ((pagenr >= addr) && (pagenr < addr+size))
+ return 1;
+ }
+ return 0;
}
-void __init mem_init(unsigned long start_mem, unsigned long end_mem)
+void __init mem_init(void)
{
- unsigned long start_low_mem = PAGE_SIZE;
int codepages = 0;
int reservedpages = 0;
int datapages = 0;
int initpages = 0;
- unsigned long tmp;
- int i, avail;
-
- end_mem &= PAGE_MASK;
-#ifdef CONFIG_BIGMEM
- bigmem_start = PAGE_ALIGN(bigmem_start);
- bigmem_end &= PAGE_MASK;
-#endif
- high_memory = (void *) end_mem;
-#ifndef CONFIG_BIGMEM
- max_mapnr = num_physpages = MAP_NR(end_mem);
+#ifdef CONFIG_HIGHMEM
+ int tmp;
+
+ if (!mem_map)
+ BUG();
+ highmem_start_page = mem_map + highstart_pfn;
+ /* cache the highmem_mapnr */
+ highmem_mapnr = highstart_pfn;
+ max_mapnr = num_physpages = highend_pfn;
#else
- max_mapnr = num_physpages = PHYSMAP_NR(bigmem_end);
- /* cache the bigmem_mapnr */
- bigmem_mapnr = PHYSMAP_NR(bigmem_start);
+ max_mapnr = num_physpages = max_low_pfn;
#endif
+ high_memory = (void *) __va(max_low_pfn * PAGE_SIZE);
/* clear the zero-page */
memset(empty_zero_page, 0, PAGE_SIZE);
- /* mark usable pages in the mem_map[] */
- start_low_mem = PAGE_ALIGN(start_low_mem)+PAGE_OFFSET;
+ /* this will put all low memory onto the freelists */
+ totalram_pages += free_all_bootmem();
-#ifdef __SMP__
- /*
- * But first pinch a few for the stack/trampoline stuff
- * FIXME: Don't need the extra page at 4K, but need to fix
- * trampoline before removing it. (see the GDT stuff)
- *
- */
- start_low_mem += PAGE_SIZE; /* 32bit startup code */
- start_low_mem = smp_alloc_memory(start_low_mem); /* AP processor stacks */
-#endif
- start_mem = PAGE_ALIGN(start_mem);
+#ifdef CONFIG_HIGHMEM
+ for (tmp = highstart_pfn; tmp < highend_pfn; tmp++) {
+ struct page *page = mem_map + tmp;
- /* walk the whitelist, unreserving good memory
- */
- for (avail = i = 0; i < e820.nr_map; i++) {
- unsigned long start_pfn, end_pfn;
-
- if (e820.map[i].type != E820_RAM) /* not usable memory */
- continue;
-
- start_pfn = (e820.map[i].addr + PAGE_SIZE - 1) >> PAGE_SHIFT;
- end_pfn = (e820.map[i].addr + e820.map[i].size) >> PAGE_SHIFT;
-
- /* We have a certain amount of low memory reserved */
- if (start_pfn < MAP_NR(start_low_mem))
- start_pfn = MAP_NR(start_low_mem);
-
- if (end_pfn <= start_pfn)
- continue;
-
- mem_init_region(start_pfn, end_pfn - start_pfn, MAP_NR(start_mem));
- }
-
- for (tmp = PAGE_OFFSET ; tmp < end_mem ; tmp += PAGE_SIZE) {
- if (tmp >= MAX_DMA_ADDRESS)
- clear_bit(PG_DMA, &mem_map[MAP_NR(tmp)].flags);
- if (PageReserved(mem_map+MAP_NR(tmp))) {
- if (tmp >= (unsigned long) &_text && tmp < (unsigned long) &_edata) {
- if (tmp < (unsigned long) &_etext)
- codepages++;
- else
- datapages++;
- } else if (tmp >= (unsigned long) &__init_begin
- && tmp < (unsigned long) &__init_end)
- initpages++;
- else if (tmp >= (unsigned long) &__bss_start
- && tmp < (unsigned long) start_mem)
- datapages++;
- else
- reservedpages++;
+ if (!page_is_ram(tmp)) {
+ SetPageReserved(page);
continue;
}
- set_page_count(mem_map+MAP_NR(tmp), 1);
- totalram += PAGE_SIZE;
-#ifdef CONFIG_BLK_DEV_INITRD
- if (!initrd_start || (tmp < initrd_start || tmp >= initrd_end))
-#endif
- free_page(tmp);
+ ClearPageReserved(page);
+ set_bit(PG_highmem, &page->flags);
+ atomic_set(&page->count, 1);
+ __free_page(page);
+ totalhigh_pages++;
}
-#ifdef CONFIG_BIGMEM
- for (tmp = bigmem_start; tmp < bigmem_end; tmp += PAGE_SIZE) {
- clear_bit(PG_reserved, &mem_map[PHYSMAP_NR(tmp)].flags);
- set_bit(PG_BIGMEM, &mem_map[PHYSMAP_NR(tmp)].flags);
- atomic_set(&mem_map[PHYSMAP_NR(tmp)].count, 1);
- free_page(tmp + PAGE_OFFSET);
- totalbig += PAGE_SIZE;
- }
- totalram += totalbig;
+ totalram_pages += totalhigh_pages;
#endif
- printk("Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, %dk init, %dk bigmem)\n",
+ printk("Memory: %luk/%luk available (%dk kernel code, %dk reserved, %dk data, %dk init, %ldk highmem)\n",
(unsigned long) nr_free_pages << (PAGE_SHIFT-10),
max_mapnr << (PAGE_SHIFT-10),
codepages << (PAGE_SHIFT-10),
reservedpages << (PAGE_SHIFT-10),
datapages << (PAGE_SHIFT-10),
initpages << (PAGE_SHIFT-10),
- (int) (totalbig >> 10)
+ (unsigned long) (totalhigh_pages << (PAGE_SHIFT-10))
);
+#if CONFIG_X86_PAE
+ if (!cpu_has_pae)
+ panic("cannot execute a PAE-enabled kernel on a PAE-incapable CPU!");
+#endif
if (boot_cpu_data.wp_works_ok < 0)
test_wp_bit();
+ /*
+ * Subtle. SMP is doing it's boot stuff late (because it has to
+ * fork idle threads) - but it also needs low mappings for the
+ * protected-mode entry to work. We zap these entries only after
+ * the WP-bit has been tested.
+ */
+#ifndef CONFIG_SMP
+ zap_low_mappings();
+#endif
+
}
void free_initmem(void)
addr = (unsigned long)(&__init_begin);
for (; addr < (unsigned long)(&__init_end); addr += PAGE_SIZE) {
- mem_map[MAP_NR(addr)].flags &= ~(1 << PG_reserved);
+ ClearPageReserved(mem_map + MAP_NR(addr));
set_page_count(mem_map+MAP_NR(addr), 1);
free_page(addr);
- totalram += PAGE_SIZE;
+ totalram_pages++;
}
printk ("Freeing unused kernel memory: %dk freed\n", (&__init_end - &__init_begin) >> 10);
}
void si_meminfo(struct sysinfo *val)
{
- val->totalram = totalram;
+ val->totalram = totalram_pages;
val->sharedram = 0;
- val->freeram = nr_free_pages << PAGE_SHIFT;
- val->bufferram = atomic_read(&buffermem);
- val->totalbig = totalbig;
- val->freebig = nr_free_bigpages << PAGE_SHIFT;
+ val->freeram = nr_free_pages;
+ val->bufferram = atomic_read(&buffermem_pages);
+ val->totalhigh = totalhigh_pages;
+ val->freehigh = nr_free_highpages;
+ val->mem_unit = PAGE_SIZE;
return;
}
end = address + size;
if (end > PMD_SIZE)
end = PMD_SIZE;
+ if (address >= end)
+ BUG();
do {
- if (!pte_none(*pte))
+ if (!pte_none(*pte)) {
printk("remap_area_pte: page already exists\n");
+ BUG();
+ }
set_pte(pte, mk_pte_phys(phys_addr, __pgprot(_PAGE_PRESENT | _PAGE_RW |
_PAGE_DIRTY | _PAGE_ACCESSED | flags)));
address += PAGE_SIZE;
phys_addr += PAGE_SIZE;
pte++;
- } while (address < end);
+ } while (address && (address < end));
}
static inline int remap_area_pmd(pmd_t * pmd, unsigned long address, unsigned long size,
if (end > PGDIR_SIZE)
end = PGDIR_SIZE;
phys_addr -= address;
+ if (address >= end)
+ BUG();
do {
pte_t * pte = pte_alloc_kernel(pmd, address);
if (!pte)
remap_area_pte(pte, address, end - address, address + phys_addr, flags);
address = (address + PMD_SIZE) & PMD_MASK;
pmd++;
- } while (address < end);
+ } while (address && (address < end));
return 0;
}
phys_addr -= address;
dir = pgd_offset(&init_mm, address);
flush_cache_all();
- while (address < end) {
- pmd_t *pmd = pmd_alloc_kernel(dir, address);
+ if (address >= end)
+ BUG();
+ do {
+ pmd_t *pmd;
+ pmd = pmd_alloc_kernel(dir, address);
if (!pmd)
return -ENOMEM;
if (remap_area_pmd(pmd, address, end - address,
set_pgdir(address, *dir);
address = (address + PGDIR_SIZE) & PGDIR_MASK;
dir++;
- }
+ } while (address && (address < end));
flush_tlb_all();
return 0;
}
int ide_release_dma (ide_hwif_t *hwif)
{
if (hwif->dmatable) {
- clear_page((unsigned long)hwif->dmatable); /* clear PRD 1st */
+ clear_page((void *)hwif->dmatable); /* clear PRD 1st */
free_page((unsigned long)hwif->dmatable); /* free PRD 2nd */
}
if ((hwif->dma_extra) && (hwif->channel == 0))
*/
void ide_cmd(ide_drive_t *drive, byte cmd, byte nsect, ide_handler_t *handler)
{
+ drive->timeout = WAIT_CMD;
ide_set_handler (drive, handler);
if (IDE_CONTROL_REG)
OUT_BYTE(drive->ctl,IDE_CONTROL_REG); /* clear nIEN */
#ifdef CONFIG_APM
#include <linux/apm_bios.h>
#endif
+#include <linux/bootmem.h>
#include <asm/io.h>
#include <asm/system.h>
struct tty_driver console_driver;
static int console_refcount;
-unsigned long __init con_init(unsigned long kmem_start)
+void __init con_init(void)
{
const char *display_desc = NULL;
unsigned int currcons = 0;
display_desc = conswitchp->con_startup();
if (!display_desc) {
fg_console = 0;
- return kmem_start;
+ return;
}
memset(&console_driver, 0, sizeof(struct tty_driver));
timer_active |= 1<<BLANK_TIMER;
}
- /* Unfortunately, kmalloc is not running yet */
- /* Due to kmalloc roundup allocating statically is more efficient -
- so provide MIN_NR_CONSOLES for people with very little memory */
+ /*
+ * kmalloc is not running yet - we use the bootmem allocator.
+ */
for (currcons = 0; currcons < MIN_NR_CONSOLES; currcons++) {
int j, k ;
- vc_cons[currcons].d = (struct vc_data *) kmem_start;
- kmem_start += sizeof(struct vc_data);
- vt_cons[currcons] = (struct vt_struct *) kmem_start;
- kmem_start += sizeof(struct vt_struct);
+ vc_cons[currcons].d = (struct vc_data *)
+ alloc_bootmem(sizeof(struct vc_data));
+ vt_cons[currcons] = (struct vt_struct *)
+ alloc_bootmem(sizeof(struct vt_struct));
visual_init(currcons, 1);
- screenbuf = (unsigned short *) kmem_start;
- kmem_start += screenbuf_size;
+ screenbuf = (unsigned short *) alloc_bootmem(screenbuf_size);
kmalloced = 0;
vc_init(currcons, video_num_lines, video_num_columns,
currcons || !sw->con_save_screen);
#endif
init_bh(CONSOLE_BH, console_bh);
-
- return kmem_start;
}
#ifndef VT_SINGLE_DRIVER
if (!tty->read_buf) {
tty->read_buf = (unsigned char *)
- get_free_page(in_interrupt() ? GFP_ATOMIC : GFP_KERNEL);
+ get_zeroed_page(in_interrupt() ? GFP_ATOMIC : GFP_KERNEL);
if (!tty->read_buf)
return -ENOMEM;
}
unsigned short ICP;
#endif
- page = get_free_page(GFP_KERNEL);
+ page = get_zeroed_page(GFP_KERNEL);
if (!page)
return -ENOMEM;
#endif
if (!tmp_buf) {
- page = get_free_page(GFP_KERNEL);
+ page = get_zeroed_page(GFP_KERNEL);
if (!page) {
return -ENOMEM;
}
/*
* Register console.
*/
-long __init serial_console_init(long kmem_start, long kmem_end)
+void __init serial_console_init(void)
{
register_console(&sercons);
- return kmem_start;
}
#endif
extern int sx_init (void);
#endif
#ifdef CONFIG_8xx
-extern long console_8xx_init(long, long);
+extern console_8xx_init(void);
extern int rs_8xx_init(void);
#endif /* CONFIG_8xx */
tp = o_tp = NULL;
ltp = o_ltp = NULL;
- tty = (struct tty_struct*) get_free_page(GFP_KERNEL);
+ tty = (struct tty_struct*) get_zeroed_page(GFP_KERNEL);
if(!tty)
goto fail_no_mem;
initialize_tty_struct(tty);
}
if (driver->type == TTY_DRIVER_TYPE_PTY) {
- o_tty = (struct tty_struct *) get_free_page(GFP_KERNEL);
+ o_tty = (struct tty_struct *) get_zeroed_page(GFP_KERNEL);
if (!o_tty)
goto free_mem_out;
initialize_tty_struct(o_tty);
* Just do some early initializations, and do the complex setup
* later.
*/
-long __init console_init(long kmem_start, long kmem_end)
+void __init console_init(void)
{
/* Setup the default TTY line discipline. */
memset(ldiscs, 0, sizeof(ldiscs));
* inform about problems etc..
*/
#ifdef CONFIG_VT
- kmem_start = con_init(kmem_start);
+ con_init();
#endif
#ifdef CONFIG_SERIAL_CONSOLE
#ifdef CONFIG_8xx
- kmem_start = console_8xx_init(kmem_start, kmem_end);
+ console_8xx_init();
#else
- kmem_start = serial_console_init(kmem_start, kmem_end);
+ serial_console_init();
#endif /* CONFIG_8xx */
#endif
- return kmem_start;
}
static struct tty_driver dev_tty_driver, dev_syscons_driver;
* Ok, now we can initialize the rest of the tty devices and can count
* on memory allocations, interrupts etc..
*/
-int __init tty_init(void)
+void __init tty_init(void)
{
if (sizeof(struct tty_struct) > PAGE_SIZE)
panic("size of tty structure > PAGE_SIZE!");
#ifdef CONFIG_VT
vcs_init();
#endif
- return 0;
}
rxf = sp->rx_ringp[entry] = (struct RxFD *)skb->tail;
skb->dev = dev;
skb_reserve(skb, sizeof(struct RxFD));
- rxf->rx_buf_addr = virt_to_le32bus(skb->tail);
+ rxf->rx_buf_addr = virt_to_bus(skb->tail);
} else {
rxf = sp->rx_ringp[entry];
}
#endif
#include <linux/kernel.h>
+#include <linux/version.h>
#include <linux/sched.h>
#include <linux/string.h>
#include <linux/timer.h>
#ifdef MODULE
char kernel_version[] = UTS_RELEASE;
-#else
-#ifndef __alpha__
-#define ioremap vremap
-#define iounmap vfree
-#endif
#endif
#if defined(MODULE) && LINUX_VERSION_CODE > 0x20115
MODULE_AUTHOR("Donald Becker <becker@cesdis.gsfc.nasa.gov>");
#define NAK_TIMEOUT (HZ) /* stall wait for printer */
#define MAX_RETRY_COUNT ((60*60*HZ)/NAK_TIMEOUT) /* should not take 1 minute a page! */
+#define BIG_BUF_SIZE 8192
+
+/*
+ * USB Printer Requests
+ */
+#define USB_PRINTER_REQ_GET_DEVICE_ID 0
+#define USB_PRINTER_REQ_GET_PORT_STATUS 1
+#define USB_PRINTER_REQ_SOFT_RESET 2
+
#define MAX_PRINTERS 8
struct pp_usb_data {
struct usb_device *pusb_dev;
- __u8 isopen; /* nz if open */
- __u8 noinput; /* nz if no input stream */
+ __u8 isopen; /* True if open */
+ __u8 noinput; /* True if no input stream */
__u8 minor; /* minor number of device */
__u8 status; /* last status from device */
int maxin, maxout; /* max transfer size in and out */
char *obuf; /* transfer buffer (out only) */
wait_queue_head_t wait_q; /* for timeouts */
unsigned int last_error; /* save for checking */
+ int bulk_in_ep; /* Bulk IN endpoint */
+ int bulk_out_ep; /* Bulk OUT endpoint */
+ int bulk_in_index; /* endpoint[bulk_in_index] */
+ int bulk_out_index; /* endpoint[bulk_out_index] */
};
static struct pp_usb_data *minor_data[MAX_PRINTERS];
#define PPDATA(x) ((struct pp_usb_data *)(x))
-unsigned char printer_read_status(struct pp_usb_data *p)
+static unsigned char printer_read_status(struct pp_usb_data *p)
{
__u8 status;
- devrequest dr;
struct usb_device *dev = p->pusb_dev;
- dr.requesttype = USB_TYPE_CLASS | USB_RT_INTERFACE | 0x80;
- dr.request = 1;
- dr.value = 0;
- dr.index = 0;
- dr.length = 1;
- if (dev->bus->op->control_msg(dev, usb_rcvctrlpipe(dev,0), &dr, &status, 1, HZ)) {
- return 0;
+ if (usb_control_msg(dev, usb_rcvctrlpipe(dev,0),
+ USB_PRINTER_REQ_GET_PORT_STATUS,
+ USB_TYPE_CLASS | USB_RT_INTERFACE | USB_DIR_IN,
+ 0, 0, &status, 1, HZ)) {
+ return 0;
}
return status;
}
return status;
}
-void printer_reset(struct pp_usb_data *p)
+static void printer_reset(struct pp_usb_data *p)
{
- devrequest dr;
struct usb_device *dev = p->pusb_dev;
- dr.requesttype = USB_TYPE_CLASS | USB_RECIP_OTHER;
- dr.request = 2;
- dr.value = 0;
- dr.index = 0;
- dr.length = 0;
- dev->bus->op->control_msg(dev, usb_sndctrlpipe(dev,0), &dr, NULL, 0, HZ);
+ usb_control_msg(dev, usb_sndctrlpipe(dev,0),
+ USB_PRINTER_REQ_SOFT_RESET,
+ USB_TYPE_CLASS | USB_RECIP_OTHER,
+ 0, 0, NULL, 0, HZ);
}
static int open_printer(struct inode * inode, struct file * file)
{
struct pp_usb_data *p;
- if(MINOR(inode->i_rdev) >= MAX_PRINTERS ||
+ if (MINOR(inode->i_rdev) >= MAX_PRINTERS ||
!minor_data[MINOR(inode->i_rdev)]) {
return -ENODEV;
}
p->isopen = 0;
file->private_data = NULL;
/* free the resources if the printer is no longer around */
- if(!p->pusb_dev) {
+ if (!p->pusb_dev) {
minor_data[p->minor] = NULL;
kfree(p);
}
unsigned long partial;
int result = USB_ST_NOERROR;
int maxretry;
- int endpoint_num;
- struct usb_interface_descriptor *interface;
- interface = p->pusb_dev->config->interface->altsetting;
- endpoint_num = (interface->endpoint[1].bEndpointAddress & 0x0f);
-
do {
char *obuf = p->obuf;
unsigned long thistime;
return bytes_written ? bytes_written : -EINTR;
}
result = p->pusb_dev->bus->op->bulk_msg(p->pusb_dev,
- usb_sndbulkpipe(p->pusb_dev, endpoint_num),
+ usb_sndbulkpipe(p->pusb_dev, p->bulk_out_ep),
obuf, thistime, &partial, HZ*20);
if (partial) {
obuf += partial;
maxretry = MAX_RETRY_COUNT;
}
if (result == USB_ST_TIMEOUT) { /* NAK - so hold for a while */
- if(!maxretry--)
+ if (!maxretry--)
return -ETIME;
interruptible_sleep_on_timeout(&p->wait_q, NAK_TIMEOUT);
continue;
char * buffer, size_t count, loff_t *ppos)
{
struct pp_usb_data *p = file->private_data;
- int read_count;
+ int read_count = 0;
int this_read;
char buf[64];
unsigned long partial;
int result;
- int endpoint_num;
- struct usb_interface_descriptor *interface;
- interface = p->pusb_dev->config->interface->altsetting;
- endpoint_num = (interface->endpoint[0].bEndpointAddress & 0x0f);
-
if (p->noinput)
return -EINVAL;
- read_count = 0;
while (count) {
if (signal_pending(current)) {
return read_count ? read_count : -EINTR;
this_read = (count > sizeof(buf)) ? sizeof(buf) : count;
result = p->pusb_dev->bus->op->bulk_msg(p->pusb_dev,
- usb_rcvbulkpipe(p->pusb_dev, endpoint_num),
+ usb_rcvbulkpipe(p->pusb_dev, p->bulk_in_ep),
buf, this_read, &partial, HZ*20);
/* unlike writes, we don't retry a NAK, just stop now */
/*
* FIXME - this will not cope with combined printer/scanners
*/
- if ((dev->descriptor.bDeviceClass != 7 &&
- dev->descriptor.bDeviceClass != 0) ||
+ if ((dev->descriptor.bDeviceClass != USB_CLASS_PRINTER &&
+ dev->descriptor.bDeviceClass != 0) ||
dev->descriptor.bNumConfigurations != 1 ||
dev->config[0].bNumInterfaces != 1) {
return -1;
interface = &dev->config[0].interface[0].altsetting[0];
- /* Lets be paranoid (for the moment)*/
- if (interface->bInterfaceClass != 7 ||
+ /* Let's be paranoid (for the moment). */
+ if (interface->bInterfaceClass != USB_CLASS_PRINTER ||
interface->bInterfaceSubClass != 1 ||
- (interface->bInterfaceProtocol != 2 && interface->bInterfaceProtocol != 1)||
+ (interface->bInterfaceProtocol != 2 && interface->bInterfaceProtocol != 1) ||
interface->bNumEndpoints > 2) {
return -1;
}
- if ((interface->endpoint[0].bEndpointAddress & 0xf0) != 0x00 ||
- interface->endpoint[0].bmAttributes != 0x02 ||
- (interface->bNumEndpoints > 1 && (
- (interface->endpoint[1].bEndpointAddress & 0xf0) != 0x80 ||
- interface->endpoint[1].bmAttributes != 0x02))) {
+ /* Does this (these) interface(s) support bulk transfers? */
+ if ((interface->endpoint[0].bmAttributes & USB_ENDPOINT_XFERTYPE_MASK)
+ != USB_ENDPOINT_XFER_BULK) {
return -1;
}
+ if ((interface->bNumEndpoints > 1) &&
+ ((interface->endpoint[1].bmAttributes & USB_ENDPOINT_XFERTYPE_MASK)
+ != USB_ENDPOINT_XFER_BULK)) {
+ return -1;
+ }
+
+ /*
+ * Does this interface have at least one OUT endpoint
+ * that we can write to: endpoint index 0 or 1?
+ */
+ if ((interface->endpoint[0].bEndpointAddress & USB_ENDPOINT_DIR_MASK)
+ != USB_DIR_OUT &&
+ (interface->bNumEndpoints > 1 &&
+ (interface->endpoint[1].bEndpointAddress & USB_ENDPOINT_DIR_MASK)
+ != USB_DIR_OUT)) {
+ return -1;
+ }
for (i=0; i<MAX_PRINTERS; i++) {
if (!minor_data[i])
break;
}
if (i >= MAX_PRINTERS) {
+ printk("No minor table space available for USB Printer\n");
return -1;
}
printk(KERN_INFO "USB Printer found at address %d\n", dev->devnum);
if (!(dev->private = kmalloc(sizeof(struct pp_usb_data), GFP_KERNEL))) {
- printk( KERN_DEBUG "usb_printer: no memory!\n");
+ printk(KERN_DEBUG "usb_printer: no memory!\n");
return -1;
}
minor_data[i] = PPDATA(dev->private);
minor_data[i]->minor = i;
minor_data[i]->pusb_dev = dev;
- /* The max packet size can't be more than 64 (& will be 64 for
- * any decent bulk device); this calculation was silly. -greg
- * minor_data[i]->maxout = interface->endpoint[0].wMaxPacketSize * 16;
- */
- minor_data[i]->maxout = 8192;
- if (minor_data[i]->maxout > PAGE_SIZE) {
- minor_data[i]->maxout = PAGE_SIZE;
- }
- if (interface->bInterfaceProtocol != 2)
+ minor_data[i]->maxout = (BIG_BUF_SIZE > PAGE_SIZE) ? PAGE_SIZE : BIG_BUF_SIZE;
+ if (interface->bInterfaceProtocol != 2) /* if not bidirectional */
minor_data[i]->noinput = 1;
- else {
- minor_data[i]->maxin = interface->endpoint[1].wMaxPacketSize;
+
+ minor_data[i]->bulk_out_index =
+ ((interface->endpoint[0].bEndpointAddress & USB_ENDPOINT_DIR_MASK)
+ == USB_DIR_OUT) ? 0 : 1;
+ minor_data[i]->bulk_in_index = minor_data[i]->noinput ? -1 :
+ (minor_data[i]->bulk_out_index == 0) ? 1 : 0;
+ minor_data[i]->bulk_in_ep = minor_data[i]->noinput ? -1 :
+ interface->endpoint[minor_data[i]->bulk_in_index].bEndpointAddress &
+ USB_ENDPOINT_NUMBER_MASK;
+ minor_data[i]->bulk_out_ep =
+ interface->endpoint[minor_data[i]->bulk_out_index].bEndpointAddress &
+ USB_ENDPOINT_NUMBER_MASK;
+ if (interface->bInterfaceProtocol == 2) { /* if bidirectional */
+ minor_data[i]->maxin =
+ interface->endpoint[minor_data[i]->bulk_in_index].wMaxPacketSize;
}
if (usb_set_configuration(dev, dev->config[0].bConfigurationValue)) {
printk(KERN_INFO " Failed usb_set_configuration: printer\n");
return -1;
}
+
+ printk(KERN_INFO "USB Printer Summary:\n");
+ printk(KERN_INFO "index=%d, maxout=%d, noinput=%d\n",
+ i, minor_data[i]->maxout, minor_data[i]->noinput);
+ printk(KERN_INFO "bulk_in_ix=%d, bulk_in_ep=%d, bulk_out_ix=%d, bulk_out_ep=%d\n",
+ minor_data[i]->bulk_in_index,
+ minor_data[i]->bulk_in_ep,
+ minor_data[i]->bulk_out_index,
+ minor_data[i]->bulk_out_ep);
+
#if 0
{
__u8 status;
__u8 ieee_id[64];
- devrequest dr;
-
- /* Lets get the device id if possible */
- dr.requesttype = USB_TYPE_CLASS | USB_RT_INTERFACE | 0x80;
- dr.request = 0;
- dr.value = 0;
- dr.index = 0;
- dr.length = sizeof(ieee_id) - 1;
- if (dev->bus->op->control_msg(dev, usb_rcvctrlpipe(dev,0), &dr, ieee_id, sizeof(ieee_id)-1, HZ) == 0) {
+
+ /* Let's get the device id if possible. */
+ if (usb_control_msg(dev, usb_rcvctrlpipe(dev,0),
+ USB_PRINTER_REQ_GET_DEVICE_ID,
+ USB_TYPE_CLASS | USB_RT_INTERFACE | USB_DIR_IN,
+ 0, 0, ieee_id,
+ sizeof(ieee_id)-1, HZ) == 0) {
if (ieee_id[1] < sizeof(ieee_id) - 1)
ieee_id[ieee_id[1]+2] = '\0';
else
ieee_id[sizeof(ieee_id)-1] = '\0';
- printk(KERN_INFO " Printer ID is %s\n", &ieee_id[2]);
+ printk(KERN_INFO " USB Printer ID is %s\n",
+ &ieee_id[2]);
}
status = printer_read_status(PPDATA(dev->private));
printk(KERN_INFO " Status is %s,%s,%s\n",
- (status & 0x10) ? "Selected" : "Not Selected",
- (status & 0x20) ? "No Paper" : "Paper",
- (status & 0x08) ? "No Error" : "Error");
+ (status & LP_PSELECD) ? "Selected" : "Not Selected",
+ (status & LP_POUTPA) ? "No Paper" : "Paper",
+ (status & LP_PERRORP) ? "No Error" : "Error");
}
#endif
return 0;
int usb_printer_init(void)
{
- usb_register(&printer_driver);
+ if (usb_register(&printer_driver)) {
+ printk(KERN_ERR "USB Printer driver cannot register: "
+ "minor number %d already in use\n",
+ printer_driver.minor);
+ return 1;
+ }
+
printk(KERN_INFO "USB Printer support registered.\n");
return 0;
}
static int grow_buffers(int size);
/* This is used by some architectures to estimate available memory. */
-atomic_t buffermem = ATOMIC_INIT(0);
+atomic_t buffermem_pages = ATOMIC_INIT(0);
/* Here is the parameter block for the bdflush process. If you add or
* remove any of the parameters, make sure to update kernel/sysctl.c.
unsigned long dirty, tot, hard_dirty_limit, soft_dirty_limit;
dirty = size_buffers_type[BUF_DIRTY] >> PAGE_SHIFT;
- tot = nr_lru_pages + nr_free_pages - nr_free_bigpages;
+ tot = nr_lru_pages + nr_free_pages + nr_free_highpages;
hard_dirty_limit = tot * bdf_prm.b_un.nfract / 100;
soft_dirty_limit = hard_dirty_limit >> 1;
*/
if (!offset) {
if (!try_to_free_buffers(page)) {
- atomic_add(PAGE_CACHE_SIZE, &buffermem);
+ atomic_inc(&buffermem_pages);
return 0;
}
}
dprintk ("iobuf %d %d %d\n", offset, length, size);
for (pageind = 0; pageind < iobuf->nr_pages; pageind++) {
- page = iobuf->pagelist[pageind];
map = iobuf->maplist[pageind];
- if (map && PageBIGMEM(map)) {
+ if (map && PageHighMem(map)) {
err = -EIO;
goto error;
}
+ page = page_address(map);
while (length > 0) {
blocknr = b[bufind++];
page_map = mem_map + MAP_NR(page);
page_map->buffers = bh;
lru_cache_add(page_map);
- atomic_add(PAGE_SIZE, &buffermem);
+ atomic_inc(&buffermem_pages);
return 1;
no_buffer_head:
int nlist;
static char *buf_types[NR_LIST] = { "CLEAN", "LOCKED", "DIRTY" };
- printk("Buffer memory: %6dkB\n", atomic_read(&buffermem) >> 10);
+ printk("Buffer memory: %6dkB\n",
+ atomic_read(&buffermem_pages) << (PAGE_SHIFT-10));
#ifdef __SMP__ /* trylock does nothing on UP and so we could deadlock */
if (!spin_trylock(&lru_list_lock))
* Use gfp() for the hash table to decrease TLB misses, use
* SLAB cache for buffer heads.
*/
-void __init buffer_init(unsigned long memory_size)
+void __init buffer_init(unsigned long mempages)
{
int order, i;
unsigned int nr_hash;
/* The buffer cache hash table is less important these days,
* trim it a bit.
*/
- memory_size >>= 14;
- memory_size *= sizeof(struct buffer_head *);
- for (order = 0; (PAGE_SIZE << order) < memory_size; order++)
+ mempages >>= 14;
+
+ mempages *= sizeof(struct buffer_head *);
+
+ for (order = 0; (1 << order) < mempages; order++)
;
/* try to allocate something until we get it or we're asking
unlock_kernel();
/* FIXME: kmem_cache_shrink here should tell us
the number of pages freed, and it should
- work in a __GFP_DMA/__GFP_BIGMEM behaviour
+ work in a __GFP_DMA/__GFP_HIGHMEM behaviour
to free only the interesting pages in
function of the needs of the current allocation. */
kmem_cache_shrink(dentry_cache);
#include <linux/fcntl.h>
#include <linux/smp_lock.h>
#include <linux/init.h>
+#include <linux/pagemap.h>
+#include <linux/highmem.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
/* XXX: add architecture specific overflow check here. */
pos = bprm->p;
- while (len>0) {
- char *pag;
+ while (len > 0) {
+ char *kaddr;
+ int i, new, err;
+ struct page *page;
int offset, bytes_to_copy;
offset = pos % PAGE_SIZE;
- if (!(pag = (char *) bprm->page[pos/PAGE_SIZE]) &&
- !(pag = (char *) bprm->page[pos/PAGE_SIZE] =
- (unsigned long *) get_free_page(GFP_USER)))
- return -ENOMEM;
+ i = pos/PAGE_SIZE;
+ page = bprm->page[i];
+ new = 0;
+ if (!page) {
+ /*
+ * Cannot yet use highmem page because
+ * we cannot sleep with a kmap held.
+ */
+ page = __get_pages(GFP_USER, 0);
+ bprm->page[i] = page;
+ if (!page)
+ return -ENOMEM;
+ new = 1;
+ }
+ kaddr = (char *)kmap(page, KM_WRITE);
+ if (new && offset)
+ memset(kaddr, 0, offset);
bytes_to_copy = PAGE_SIZE - offset;
- if (bytes_to_copy > len)
+ if (bytes_to_copy > len) {
bytes_to_copy = len;
- if (copy_from_user(pag + offset, str, bytes_to_copy))
+ if (new)
+ memset(kaddr+offset+len, 0, PAGE_SIZE-offset-len);
+ }
+ err = copy_from_user(kaddr + offset, str, bytes_to_copy);
+ flush_page_to_ram(kaddr);
+ kunmap((unsigned long)kaddr, KM_WRITE);
+
+ if (err)
return -EFAULT;
pos += bytes_to_copy;
{
if (bprm->argc) {
unsigned long offset;
- char * page;
+ char * kaddr;
+ struct page *page;
+
offset = bprm->p % PAGE_SIZE;
- page = (char*)bprm->page[bprm->p/PAGE_SIZE];
- while(bprm->p++,*(page+offset++))
- if(offset==PAGE_SIZE){
- offset=0;
- page = (char*)bprm->page[bprm->p/PAGE_SIZE];
- }
+ goto inside;
+
+ while (bprm->p++, *(kaddr+offset++)) {
+ if (offset != PAGE_SIZE)
+ continue;
+ offset = 0;
+ kunmap((unsigned long)kaddr, KM_WRITE);
+inside:
+ page = bprm->page[bprm->p/PAGE_SIZE];
+ kaddr = (char *)kmap(page, KM_WRITE);
+ }
+ kunmap((unsigned long)kaddr, KM_WRITE);
bprm->argc--;
}
}
bprm->dentry = NULL;
bprm_loader.p = PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *);
- for (i=0 ; i<MAX_ARG_PAGES ; i++) /* clear page-table */
- bprm_loader.page[i] = 0;
+ for (i = 0 ; i < MAX_ARG_PAGES ; i++) /* clear page-table */
+ bprm_loader.page[i] = NULL;
dentry = open_namei(dynloader[0], 0, 0);
retval = PTR_ERR(dentry);
/* Assumes that free_page() can take a NULL argument. */
/* I hope this is ok for all architectures */
- for (i=0 ; i<MAX_ARG_PAGES ; i++)
- free_page(bprm.page[i]);
+ for (i = 0 ; i < MAX_ARG_PAGES ; i++)
+ if (bprm.page[i])
+ __free_page(bprm.page[i]);
return retval;
}
/*
- * Allocate an fd array, using get_free_page() if possible.
+ * Allocate an fd array, using __get_free_page() if possible.
* Note: the array isn't cleared at allocation time.
*/
struct file ** alloc_fd_array(int num)
}
/*
- * Allocate an fdset array, using get_free_page() if possible.
+ * Allocate an fdset array, using __get_free_page() if possible.
* Note: the array isn't cleared at allocation time.
*/
fd_set * alloc_fdset(int num)
memset(inode, 0, sizeof(*inode));
init_waitqueue_head(&inode->i_wait);
INIT_LIST_HEAD(&inode->i_hash);
+ INIT_LIST_HEAD(&inode->i_pages);
INIT_LIST_HEAD(&inode->i_dentry);
sema_init(&inode->i_sem, 1);
spin_lock_init(&inode->i_shared_lock);
prune_icache(count);
/* FIXME: kmem_cache_shrink here should tell us
the number of pages freed, and it should
- work in a __GFP_DMA/__GFP_BIGMEM behaviour
+ work in a __GFP_DMA/__GFP_HIGHMEM behaviour
to free only the interesting pages in
function of the needs of the current allocation. */
kmem_cache_shrink(inode_cachep);
init_waitqueue_head(&iobuf->wait_queue);
iobuf->end_io = simple_wakeup_kiobuf;
iobuf->array_len = KIO_STATIC_PAGES;
- iobuf->pagelist = iobuf->page_array;
iobuf->maplist = iobuf->map_array;
*bufp++ = iobuf;
}
for (i = 0; i < nr; i++) {
iobuf = bufp[i];
- if (iobuf->array_len > KIO_STATIC_PAGES) {
- kfree (iobuf->pagelist);
+ if (iobuf->array_len > KIO_STATIC_PAGES)
kfree (iobuf->maplist);
- }
kmem_cache_free(kiobuf_cachep, bufp[i]);
}
}
int expand_kiobuf(struct kiobuf *iobuf, int wanted)
{
- unsigned long * pagelist;
struct page ** maplist;
if (iobuf->array_len >= wanted)
return 0;
- pagelist = (unsigned long *)
- kmalloc(wanted * sizeof(unsigned long), GFP_KERNEL);
- if (!pagelist)
- return -ENOMEM;
-
maplist = (struct page **)
kmalloc(wanted * sizeof(struct page **), GFP_KERNEL);
- if (!maplist) {
- kfree(pagelist);
+ if (!maplist)
return -ENOMEM;
- }
/* Did it grow while we waited? */
if (iobuf->array_len >= wanted) {
- kfree(pagelist);
kfree(maplist);
return 0;
}
- memcpy (pagelist, iobuf->pagelist, wanted * sizeof(unsigned long));
memcpy (maplist, iobuf->maplist, wanted * sizeof(struct page **));
- if (iobuf->array_len > KIO_STATIC_PAGES) {
- kfree (iobuf->pagelist);
+ if (iobuf->array_len > KIO_STATIC_PAGES)
kfree (iobuf->maplist);
- }
- iobuf->pagelist = pagelist;
iobuf->maplist = maplist;
iobuf->array_len = wanted;
return 0;
struct nfs_readdirres rd_res;
struct dentry *dentry = file->f_dentry;
struct inode *inode = dentry->d_inode;
- struct page *page, **hash;
- unsigned long page_cache;
+ struct page *page, **hash, *page_cache;
long offset;
__u32 *cookiep;
goto unlock_out;
}
- page = page_cache_entry(page_cache);
+ page = page_cache;
if (add_to_page_cache_unique(page, inode, offset, hash)) {
page_cache_release(page);
goto repeat;
}
rd_args.fh = NFS_FH(dentry);
- rd_res.buffer = (char *)page_cache;
+ rd_res.buffer = (char *)page_address(page_cache);
rd_res.bufsiz = PAGE_CACHE_SIZE;
rd_res.cookie = *cookiep;
do {
static struct page *try_to_get_symlink_page(struct dentry *dentry, struct inode *inode)
{
struct nfs_readlinkargs rl_args;
- struct page *page, **hash;
- unsigned long page_cache;
+ struct page *page, **hash, *page_cache;
page = NULL;
page_cache = page_cache_alloc();
goto unlock_out;
}
- page = page_cache_entry(page_cache);
+ page = page_cache;
if (add_to_page_cache_unique(page, inode, 0, hash)) {
page_cache_release(page);
goto repeat;
* XDR response verification will NULL terminate it.
*/
rl_args.fh = NFS_FH(dentry);
- rl_args.buffer = (const void *)page_cache;
+ rl_args.buffer = (const void *)page_address(page_cache);
if (rpc_call(NFS_CLIENT(inode), NFSPROC_READLINK,
&rl_args, NULL, 0) < 0)
goto error;
i.sharedram >> 10,
i.bufferram >> 10,
atomic_read(&page_cache_size) << (PAGE_SHIFT - 10),
- i.totalbig >> 10,
- i.freebig >> 10,
+ i.totalhigh >> 10,
+ i.freehigh >> 10,
i.totalswap >> 10,
i.freeswap >> 10);
}
return sprintf(buffer, "%s\n", saved_command_line);
}
-static unsigned long get_phys_addr(struct mm_struct * mm, unsigned long ptr)
+static struct page * get_phys_page(struct mm_struct * mm, unsigned long ptr)
{
pgd_t *page_dir;
pmd_t *page_middle;
pte = *pte_offset(page_middle,ptr);
if (!pte_present(pte))
return 0;
- return pte_page(pte) + (ptr & ~PAGE_MASK);
+ return pte_page(pte);
}
-#include <linux/bigmem.h>
+#include <linux/highmem.h>
static int get_array(struct mm_struct *mm, unsigned long start, unsigned long end, char * buffer)
{
unsigned long addr;
int size = 0, result = 0;
- char c;
+ char *buf, c;
if (start >= end)
return result;
for (;;) {
- addr = get_phys_addr(mm, start);
- if (!addr)
+ struct page *page = get_phys_page(mm, start);
+ if (!page)
return result;
- addr = kmap(addr, KM_READ);
+ addr = kmap(page, KM_READ);
+ buf = (char *) (addr + (start & ~PAGE_MASK));
do {
- c = *(char *) addr;
+ c = *buf;
if (!c)
result = size;
- if (size < PAGE_SIZE)
- buffer[size++] = c;
- else {
+ if (size >= PAGE_SIZE) {
kunmap(addr, KM_READ);
return result;
}
- addr++;
+ buffer[size++] = c;
+ buf++;
start++;
if (!c && start >= end) {
kunmap(addr, KM_READ);
return result;
}
- } while (addr & ~PAGE_MASK);
+ } while (~PAGE_MASK & (unsigned long)buf);
kunmap(addr, KM_READ);
}
return result;
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/proc_fs.h>
-#include <linux/bigmem.h>
+#include <linux/highmem.h>
#include <asm/page.h>
#include <asm/uaccess.h>
pgd_t *page_dir;
pmd_t *page_middle;
pte_t pte;
- char * page;
+ struct page * page;
struct task_struct * tsk;
unsigned long addr;
+ unsigned long maddr; /* temporary mapped address */
char *tmp;
ssize_t scount, i;
if (pgd_none(*page_dir))
break;
if (pgd_bad(*page_dir)) {
- printk("Bad page dir entry %08lx\n", pgd_val(*page_dir));
+ pgd_ERROR(*page_dir);
pgd_clear(page_dir);
break;
}
if (pmd_none(*page_middle))
break;
if (pmd_bad(*page_middle)) {
- printk("Bad page middle entry %08lx\n", pmd_val(*page_middle));
+ pmd_ERROR(*page_middle);
pmd_clear(page_middle);
break;
}
pte = *pte_offset(page_middle,addr);
if (!pte_present(pte))
break;
- page = (char *) pte_page(pte) + (addr & ~PAGE_MASK);
+ page = pte_page(pte);
i = PAGE_SIZE-(addr & ~PAGE_MASK);
if (i > scount)
i = scount;
- page = (char *) kmap((unsigned long) page, KM_READ);
- copy_to_user(tmp, page, i);
- kunmap((unsigned long) page, KM_READ);
+ maddr = kmap(page, KM_READ);
+ copy_to_user(tmp, (char *)maddr + (addr & ~PAGE_MASK), i);
+ kunmap(maddr, KM_READ);
addr += i;
tmp += i;
scount -= i;
pgd_t *page_dir;
pmd_t *page_middle;
pte_t pte;
- char * page;
+ struct page * page;
struct task_struct * tsk;
unsigned long addr;
+ unsigned long maddr; /* temporary mapped address */
char *tmp;
long i;
if (pgd_none(*page_dir))
break;
if (pgd_bad(*page_dir)) {
- printk("Bad page dir entry %08lx\n", pgd_val(*page_dir));
+ pgd_ERROR(*page_dir);
pgd_clear(page_dir);
break;
}
if (pmd_none(*page_middle))
break;
if (pmd_bad(*page_middle)) {
- printk("Bad page middle entry %08lx\n", pmd_val(*page_middle));
+ pmd_ERROR(*page_middle);
pmd_clear(page_middle);
break;
}
break;
if (!pte_write(pte))
break;
- page = (char *) pte_page(pte) + (addr & ~PAGE_MASK);
+ page = pte_page(pte);
i = PAGE_SIZE-(addr & ~PAGE_MASK);
if (i > count)
i = count;
- page = (unsigned long) kmap((unsigned long) page, KM_WRITE);
- copy_from_user(page, tmp, i);
- kunmap((unsigned long) page, KM_WRITE);
+ maddr = kmap(page, KM_WRITE);
+ copy_from_user((char *)maddr + (addr & ~PAGE_MASK), tmp, i);
+ kunmap(maddr, KM_WRITE);
addr += i;
tmp += i;
count -= i;
if (pgd_none(*src_dir))
return -EINVAL;
if (pgd_bad(*src_dir)) {
- printk("Bad source page dir entry %08lx\n", pgd_val(*src_dir));
+ pgd_ERROR(*src_dir);
return -EINVAL;
}
src_middle = pmd_offset(src_dir, stmp);
if (pmd_none(*src_middle))
return -EINVAL;
if (pmd_bad(*src_middle)) {
- printk("Bad source page middle entry %08lx\n", pmd_val(*src_middle));
+ pmd_ERROR(*src_middle);
return -EINVAL;
}
src_table = pte_offset(src_middle, stmp);
set_pte(src_table, pte_mkdirty(*src_table));
set_pte(dest_table, *src_table);
- mapnr = MAP_NR(pte_page(*src_table));
+ mapnr = pte_pagenr(*src_table);
if (mapnr < max_mapnr)
- get_page(mem_map + MAP_NR(pte_page(*src_table)));
+ get_page(mem_map + pte_pagenr(*src_table));
stmp += PAGE_SIZE;
dtmp += PAGE_SIZE;
+++ /dev/null
-/*
- * bigmem.h: virtual kernel memory mappings for big memory
- *
- * Used in CONFIG_BIGMEM systems for memory pages which are not
- * addressable by direct kernel virtual adresses.
- *
- * Copyright (C) 1999 Gerhard Wichert, Siemens AG
- * Gerhard.Wichert@pdb.siemens.de
- */
-
-#ifndef _ASM_BIGMEM_H
-#define _ASM_BIGMEM_H
-
-#include <linux/init.h>
-
-#define BIGMEM_DEBUG /* undef for production */
-
-/* declarations for bigmem.c */
-extern unsigned long bigmem_start, bigmem_end;
-extern int nr_free_bigpages;
-
-extern pte_t *kmap_pte;
-extern pgprot_t kmap_prot;
-
-extern void kmap_init(void) __init;
-
-/* kmap helper functions necessary to access the bigmem pages in kernel */
-#include <asm/pgtable.h>
-#include <asm/kmap_types.h>
-
-extern inline unsigned long kmap(unsigned long kaddr, enum km_type type)
-{
- if (__pa(kaddr) < bigmem_start)
- return kaddr;
- {
- enum fixed_addresses idx = type+KM_TYPE_NR*smp_processor_id();
- unsigned long vaddr = __fix_to_virt(FIX_KMAP_BEGIN+idx);
-
-#ifdef BIGMEM_DEBUG
- if (!pte_none(*(kmap_pte-idx)))
- {
- __label__ here;
- here:
- printk(KERN_ERR "not null pte on CPU %d from %p\n",
- smp_processor_id(), &&here);
- }
-#endif
- set_pte(kmap_pte-idx, mk_pte(kaddr & PAGE_MASK, kmap_prot));
- __flush_tlb_one(vaddr);
-
- return vaddr | (kaddr & ~PAGE_MASK);
- }
-}
-
-extern inline void kunmap(unsigned long vaddr, enum km_type type)
-{
-#ifdef BIGMEM_DEBUG
- enum fixed_addresses idx = type+KM_TYPE_NR*smp_processor_id();
- if ((vaddr & PAGE_MASK) == __fix_to_virt(FIX_KMAP_BEGIN+idx))
- {
- /* force other mappings to Oops if they'll try to access
- this pte without first remap it */
- pte_clear(kmap_pte-idx);
- __flush_tlb_one(vaddr);
- }
-#endif
-}
-
-#endif /* _ASM_BIGMEM_H */
* have the F0 0F bug, which lets nonpriviledged users lock up the system:
*/
+#ifndef CONFIG_M686
extern void trap_init_f00f_bug(void);
static void __init check_pentium_f00f(void)
trap_init_f00f_bug();
}
}
+#endif
/*
* Perform the Cyrix 5/2 test. A Cyrix won't change
check_hlt();
check_popad();
check_amd_k6();
+#ifndef CONFIG_M686
check_pentium_f00f();
+#endif
check_cyrix_coma();
system_utsname.machine[1] = '0' + boot_cpu_data.x86;
}
#include <linux/kernel.h>
#include <asm/apic.h>
#include <asm/page.h>
-#ifdef CONFIG_BIGMEM
+#ifdef CONFIG_HIGHMEM
#include <linux/threads.h>
#include <asm/kmap_types.h>
#endif
*
* these 'compile-time allocated' memory buffers are
* fixed-size 4k pages. (or larger if used with an increment
- * bigger than 1) use fixmap_set(idx,phys) to associate
+ * highger than 1) use fixmap_set(idx,phys) to associate
* physical memory with fixmap indices.
*
* TLB entries of such buffers will not be flushed across
FIX_LI_PCIA, /* Lithium PCI Bridge A */
FIX_LI_PCIB, /* Lithium PCI Bridge B */
#endif
-#ifdef CONFIG_BIGMEM
+#ifdef CONFIG_HIGHMEM
FIX_KMAP_BEGIN, /* reserved pte's for temporary kernel mappings */
FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
#endif
--- /dev/null
+/*
+ * highmem.h: virtual kernel memory mappings for high memory
+ *
+ * Used in CONFIG_HIGHMEM systems for memory pages which
+ * are not addressable by direct kernel virtual adresses.
+ *
+ * Copyright (C) 1999 Gerhard Wichert, Siemens AG
+ * Gerhard.Wichert@pdb.siemens.de
+ *
+ *
+ * Redesigned the x86 32-bit VM architecture to deal with
+ * up to 16 Terrabyte physical memory. With current x86 CPUs
+ * we now support up to 64 Gigabytes physical RAM.
+ *
+ * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
+ */
+
+#ifndef _ASM_HIGHMEM_H
+#define _ASM_HIGHMEM_H
+
+#include <linux/init.h>
+
+/* undef for production */
+#define HIGHMEM_DEBUG 1
+
+/* declarations for highmem.c */
+extern unsigned long highstart_pfn, highend_pfn;
+
+extern pte_t *kmap_pte;
+extern pgprot_t kmap_prot;
+
+extern void kmap_init(void) __init;
+
+/* kmap helper functions necessary to access the highmem pages in kernel */
+#include <asm/pgtable.h>
+#include <asm/kmap_types.h>
+
+extern inline unsigned long kmap(struct page *page, enum km_type type)
+{
+ if (page < highmem_start_page)
+ return page_address(page);
+ {
+ enum fixed_addresses idx = type+KM_TYPE_NR*smp_processor_id();
+ unsigned long vaddr = __fix_to_virt(FIX_KMAP_BEGIN+idx);
+
+#if HIGHMEM_DEBUG
+ if (!pte_none(*(kmap_pte-idx)))
+ {
+ __label__ here;
+ here:
+ printk(KERN_ERR "not null pte on CPU %d from %p\n",
+ smp_processor_id(), &&here);
+ }
+#endif
+ set_pte(kmap_pte-idx, mk_pte(page, kmap_prot));
+ __flush_tlb_one(vaddr);
+
+ return vaddr;
+ }
+}
+
+extern inline void kunmap(unsigned long vaddr, enum km_type type)
+{
+#if HIGHMEM_DEBUG
+ enum fixed_addresses idx = type+KM_TYPE_NR*smp_processor_id();
+ if ((vaddr & PAGE_MASK) == __fix_to_virt(FIX_KMAP_BEGIN+idx))
+ {
+ /* force other mappings to Oops if they'll try to access
+ this pte without first remap it */
+ pte_clear(kmap_pte-idx);
+ __flush_tlb_one(vaddr);
+ }
+#endif
+}
+
+extern inline void kmap_check(void)
+{
+#if HIGHMEM_DEBUG
+ int idx_base = KM_TYPE_NR*smp_processor_id(), i;
+ for (i = idx_base; i < idx_base+KM_TYPE_NR; i++)
+ if (!pte_none(*(kmap_pte-i)))
+ BUG();
+#endif
+}
+#endif /* _ASM_HIGHMEM_H */
#include <linux/vmalloc.h>
#include <asm/page.h>
-#define __io_virt(x) ((void *)(PAGE_OFFSET | (unsigned long)(x)))
-#define __io_phys(x) ((unsigned long)(x) & ~PAGE_OFFSET)
+/*
+ * Temporary debugging check to catch old code using
+ * unmapped ISA addresses. Will be removed in 2.4.
+ */
+#define __io_virt(x) ((unsigned long)(x) < PAGE_OFFSET ? \
+ ({ __label__ __l; __l: printk("io mapaddr %p not valid at %p!\n", (char *)(x), &&__l); __va(x); }) : (char *)(x))
+#define __io_phys(x) ((unsigned long)(x) < PAGE_OFFSET ? \
+ ({ __label__ __l; __l: printk("io mapaddr %p not valid at %p!\n", (char *)(x), &&__l); (unsigned long)(x); }) : __pa(x))
+
/*
* Change virtual addresses to physical addresses and vv.
* These are pretty trivial
*/
extern inline unsigned long virt_to_phys(volatile void * address)
{
-#ifdef CONFIG_BIGMEM
return __pa(address);
-#else
- return __io_phys(address);
-#endif
}
extern inline void * phys_to_virt(unsigned long address)
{
-#ifdef CONFIG_BIGMEM
return __va(address);
-#else
- return __io_virt(address);
-#endif
}
extern void * __ioremap(unsigned long offset, unsigned long size, unsigned long flags);
#define memcpy_fromio(a,b,c) memcpy((a),__io_virt(b),(c))
#define memcpy_toio(a,b,c) memcpy(__io_virt(a),(b),(c))
+/*
+ * ISA space is 'always mapped' on a typical x86 system, no need to
+ * explicitly ioremap() it. The fact that the ISA IO space is mapped
+ * to PAGE_OFFSET is pure coincidence - it does not mean ISA values
+ * are physical addresses. The following constant pointer can be
+ * used as the IO-area pointer (it can be iounmapped as well, so the
+ * analogy with PCI is quite large):
+ */
+#define __ISA_IO_base ((char *)(PAGE_OFFSET))
+
+#define isa_readb(a) readb(__ISA_IO_base + (a))
+#define isa_readw(a) readb(__ISA_IO_base + (a))
+#define isa_readl(a) readb(__ISA_IO_base + (a))
+#define isa_writeb(b,a) writeb(b,__ISA_IO_base + (a))
+#define isa_writew(w,a) writeb(w,__ISA_IO_base + (a))
+#define isa_writel(l,a) writeb(l,__ISA_IO_base + (a))
+
/*
* Again, i386 does not require mem IO specific function.
*/
#ifdef __KERNEL__
#ifndef __ASSEMBLY__
-#define STRICT_MM_TYPECHECKS
-
#include <linux/config.h>
#ifdef CONFIG_X86_USE_3DNOW
#endif
-#ifdef STRICT_MM_TYPECHECKS
/*
* These are used to make use of C type-checking..
*/
+#if CONFIG_X86_PAE
+typedef struct { unsigned long long pte; } pte_t;
+typedef struct { unsigned long long pmd; } pmd_t;
+typedef struct { unsigned long long pgd; } pgd_t;
+#else
typedef struct { unsigned long pte; } pte_t;
typedef struct { unsigned long pmd; } pmd_t;
typedef struct { unsigned long pgd; } pgd_t;
+#endif
+
typedef struct { unsigned long pgprot; } pgprot_t;
#define pte_val(x) ((x).pte)
#define __pgd(x) ((pgd_t) { (x) } )
#define __pgprot(x) ((pgprot_t) { (x) } )
-#else
-/*
- * .. while these make it easier on the compiler
- */
-typedef unsigned long pte_t;
-typedef unsigned long pmd_t;
-typedef unsigned long pgd_t;
-typedef unsigned long pgprot_t;
-
-#define pte_val(x) (x)
-#define pmd_val(x) (x)
-#define pgd_val(x) (x)
-#define pgprot_val(x) (x)
-
-#define __pte(x) (x)
-#define __pmd(x) (x)
-#define __pgd(x) (x)
-#define __pgprot(x) (x)
-
-#endif
#endif /* !__ASSEMBLY__ */
/* to align the pointer to the (next) page boundary */
#ifndef __ASSEMBLY__
+extern int console_loglevel;
+
+/*
+ * Tell the user there is some problem. Beep too, so we can
+ * see^H^H^Hhear bugs in early bootup as well!
+ */
#define BUG() do { \
+ __asm__ __volatile__ ("movb $0x3,%al; outb %al,$0x61"); \
printk("kernel BUG at %s:%d!\n", __FILE__, __LINE__); \
+ console_loglevel = 0; \
__asm__ __volatile__(".byte 0x0f,0x0b"); \
} while (0)
--- /dev/null
+#ifndef _I386_PGTABLE_2LEVEL_H
+#define _I386_PGTABLE_2LEVEL_H
+
+/*
+ * traditional i386 two-level paging structure:
+ */
+
+#define PGDIR_SHIFT 22
+#define PTRS_PER_PGD 1024
+
+/*
+ * the i386 is two-level, so we don't really have any
+ * PMD directory physically.
+ */
+#define PMD_SHIFT 22
+#define PTRS_PER_PMD 1
+
+#define PTRS_PER_PTE 1024
+
+#define pte_ERROR(e) \
+ printk("%s:%d: bad pte %08lx.\n", __FILE__, __LINE__, pte_val(e))
+#define pmd_ERROR(e) \
+ printk("%s:%d: bad pmd %08lx.\n", __FILE__, __LINE__, pmd_val(e))
+#define pgd_ERROR(e) \
+ printk("%s:%d: bad pgd %08lx.\n", __FILE__, __LINE__, pgd_val(e))
+
+/*
+ * The "pgd_xxx()" functions here are trivial for a folded two-level
+ * setup: the pgd is never bad, and a pmd always exists (as it's folded
+ * into the pgd entry)
+ */
+extern inline int pgd_none(pgd_t pgd) { return 0; }
+extern inline int pgd_bad(pgd_t pgd) { return 0; }
+extern inline int pgd_present(pgd_t pgd) { return 1; }
+#define pgd_clear(xp) do { pgd_val(*(xp)) = 0; } while (0)
+
+#define pgd_page(pgd) \
+((unsigned long) __va(pgd_val(pgd) & PAGE_MASK))
+
+extern inline pmd_t * pmd_offset(pgd_t * dir, unsigned long address)
+{
+ return (pmd_t *) dir;
+}
+
+extern __inline__ pmd_t *get_pmd_fast(void)
+{
+ return (pmd_t *)0;
+}
+
+extern __inline__ void free_pmd_fast(pmd_t *pmd) { }
+extern __inline__ void free_pmd_slow(pmd_t *pmd) { }
+
+extern inline pmd_t * pmd_alloc(pgd_t *pgd, unsigned long address)
+{
+ if (!pgd)
+ BUG();
+ return (pmd_t *) pgd;
+}
+
+#define SWP_ENTRY(type,offset) __pte((((type) << 1) | ((offset) << 8)))
+
+#endif /* _I386_PGTABLE_2LEVEL_H */
--- /dev/null
+#ifndef _I386_PGTABLE_3LEVEL_H
+#define _I386_PGTABLE_3LEVEL_H
+
+/*
+ * Intel Physical Address Extension (PAE) Mode - three-level page
+ * tables on PPro+ CPUs.
+ *
+ * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
+ */
+
+/*
+ * PGDIR_SHIFT determines what a top-level page table entry can map
+ */
+#define PGDIR_SHIFT 30
+#define PTRS_PER_PGD 4
+
+/*
+ * PMD_SHIFT determines the size of the area a middle-level
+ * page table can map
+ */
+#define PMD_SHIFT 21
+#define PTRS_PER_PMD 512
+
+/*
+ * entries per page directory level
+ */
+#define PTRS_PER_PTE 512
+
+#define pte_ERROR(e) \
+ printk("%s:%d: bad pte %016Lx.\n", __FILE__, __LINE__, pte_val(e))
+#define pmd_ERROR(e) \
+ printk("%s:%d: bad pmd %016Lx.\n", __FILE__, __LINE__, pmd_val(e))
+#define pgd_ERROR(e) \
+ printk("%s:%d: bad pgd %016Lx.\n", __FILE__, __LINE__, pgd_val(e))
+
+/*
+ * Subtle, in PAE mode we cannot have zeroes in the top level
+ * page directory, the CPU enforces this.
+ */
+#define pgd_none(x) (pgd_val(x) == 1ULL)
+extern inline int pgd_bad(pgd_t pgd) { return 0; }
+extern inline int pgd_present(pgd_t pgd) { return !pgd_none(pgd); }
+/*
+ * Pentium-II errata A13: in PAE mode we explicitly have to flush
+ * the TLB via cr3 if the top-level pgd is changed... This was one tough
+ * thing to find out - guess i should first read all the documentation
+ * next time around ;)
+ */
+extern inline void __pgd_clear (pgd_t * pgd)
+{
+ pgd_val(*pgd) = 1; // no zero allowed!
+}
+
+extern inline void pgd_clear (pgd_t * pgd)
+{
+ __pgd_clear(pgd);
+ __flush_tlb();
+}
+
+#define pgd_page(pgd) \
+((unsigned long) __va(pgd_val(pgd) & PAGE_MASK))
+
+/* Find an entry in the second-level page table.. */
+#define pmd_offset(dir, address) ((pmd_t *) pgd_page(*(dir)) + \
+ __pmd_offset(address))
+
+extern __inline__ pmd_t *get_pmd_slow(void)
+{
+ pmd_t *ret = (pmd_t *)__get_free_page(GFP_KERNEL);
+
+ if (ret)
+ memset(ret, 0, PAGE_SIZE);
+ return ret;
+}
+
+extern __inline__ pmd_t *get_pmd_fast(void)
+{
+ unsigned long *ret;
+
+ if ((ret = pmd_quicklist) != NULL) {
+ pmd_quicklist = (unsigned long *)(*ret);
+ ret[0] = 0;
+ pgtable_cache_size--;
+ } else
+ ret = (unsigned long *)get_pmd_slow();
+ return (pmd_t *)ret;
+}
+
+extern __inline__ void free_pmd_fast(pmd_t *pmd)
+{
+ *(unsigned long *)pmd = (unsigned long) pmd_quicklist;
+ pmd_quicklist = (unsigned long *) pmd;
+ pgtable_cache_size++;
+}
+
+extern __inline__ void free_pmd_slow(pmd_t *pmd)
+{
+ free_page((unsigned long)pmd);
+}
+
+extern inline pmd_t * pmd_alloc(pgd_t *pgd, unsigned long address)
+{
+ if (!pgd)
+ BUG();
+ address = (address >> PMD_SHIFT) & (PTRS_PER_PMD - 1);
+ if (pgd_none(*pgd)) {
+ pmd_t *page = get_pmd_fast();
+
+ if (!page)
+ page = get_pmd_slow();
+ if (page) {
+ if (pgd_none(*pgd)) {
+ pgd_val(*pgd) = 1 + __pa(page);
+ __flush_tlb();
+ return page + address;
+ } else
+ free_pmd_fast(page);
+ } else
+ return NULL;
+ }
+ return (pmd_t *)pgd_page(*pgd) + address;
+}
+
+/*
+ * Subtle. offset can overflow 32 bits and that's a feature - we can do
+ * up to 16 TB swap on PAE. (Not that anyone should need that much
+ * swapspace, but who knows?)
+ */
+#define SWP_ENTRY(type,offset) __pte((((type) << 1) | ((offset) << 8ULL)))
+
+#endif /* _I386_PGTABLE_3LEVEL_H */
flush_tlb_mm(mm);
}
-
#endif
#endif /* !__ASSEMBLY__ */
+#define pgd_quicklist (current_cpu_data.pgd_quick)
+#define pmd_quicklist (current_cpu_data.pmd_quick)
+#define pte_quicklist (current_cpu_data.pte_quick)
+#define pgtable_cache_size (current_cpu_data.pgtable_cache_sz)
+
+/*
+ * The Linux x86 paging architecture is 'compile-time dual-mode', it
+ * implements both the traditional 2-level x86 page tables and the
+ * newer 3-level PAE-mode page tables.
+ */
+#ifndef __ASSEMBLY__
+#if CONFIG_X86_PAE
+# include <asm/pgtable-3level.h>
+#else
+# include <asm/pgtable-2level.h>
+#endif
+#endif
-/* Certain architectures need to do special things when PTEs
+/*
+ * Certain architectures need to do special things when PTEs
* within a page table are directly modified. Thus, the following
* hook is made available.
*/
#define set_pte(pteptr, pteval) ((*(pteptr)) = (pteval))
-/* PMD_SHIFT determines the size of the area a second-level page table can map */
-#define PMD_SHIFT 22
+#define __beep() asm("movb $0x3,%al; outb %al,$0x61")
+
#define PMD_SIZE (1UL << PMD_SHIFT)
#define PMD_MASK (~(PMD_SIZE-1))
-
-/* PGDIR_SHIFT determines what a third-level page table entry can map */
-#define PGDIR_SHIFT 22
#define PGDIR_SIZE (1UL << PGDIR_SHIFT)
#define PGDIR_MASK (~(PGDIR_SIZE-1))
-/*
- * entries per page directory level: the i386 is two-level, so
- * we don't really have any PMD directory physically.
- */
-#define PTRS_PER_PTE 1024
-#define PTRS_PER_PMD 1
-#define PTRS_PER_PGD 1024
#define USER_PTRS_PER_PGD (TASK_SIZE/PGDIR_SIZE)
-/*
- * pgd entries used up by user/kernel:
- */
-
#define USER_PGD_PTRS (PAGE_OFFSET >> PGDIR_SHIFT)
#define KERNEL_PGD_PTRS (PTRS_PER_PGD-USER_PGD_PTRS)
-#define __USER_PGD_PTRS ((__PAGE_OFFSET >> PGDIR_SHIFT) & 0x3ff)
-#define __KERNEL_PGD_PTRS (PTRS_PER_PGD-__USER_PGD_PTRS)
+
+#define TWOLEVEL_PGDIR_SHIFT 22
+#define BOOT_USER_PGD_PTRS (__PAGE_OFFSET >> TWOLEVEL_PGDIR_SHIFT)
+#define BOOT_KERNEL_PGD_PTRS (1024-BOOT_USER_PGD_PTRS)
+
#ifndef __ASSEMBLY__
/* Just any arbitrary offset to the start of the vmalloc VM area: the
#define _PAGE_PCD 0x010
#define _PAGE_ACCESSED 0x020
#define _PAGE_DIRTY 0x040
-#define _PAGE_4M 0x080 /* 4 MB page, Pentium+, if present.. */
+#define _PAGE_PSE 0x080 /* 4 MB (or 2MB) page, Pentium+, if present.. */
#define _PAGE_GLOBAL 0x100 /* Global TLB entry PPro+ */
#define _PAGE_PROTNONE 0x080 /* If not present */
/* page table for 0-4MB for everybody */
extern unsigned long pg0[1024];
-/* zero page used for uninitialized stuff */
-extern unsigned long empty_zero_page[1024];
/*
- * BAD_PAGETABLE is used when we need a bogus page-table, while
- * BAD_PAGE is used for a bogus page.
- *
* ZERO_PAGE is a global shared page that is always zero: used
* for zero-mapped memory areas etc..
*/
-extern pte_t __bad_page(void);
-extern pte_t * __bad_pagetable(void);
-
-#define BAD_PAGETABLE __bad_pagetable()
-#define BAD_PAGE __bad_page()
-#define ZERO_PAGE(vaddr) ((unsigned long) empty_zero_page)
-
-/* number of bits that fit into a memory pointer */
-#define BITS_PER_PTR (8*sizeof(unsigned long))
-
-/* to align the pointer to a pointer address */
-#define PTR_MASK (~(sizeof(void*)-1))
-
-/* sizeof(void*)==1<<SIZEOF_PTR_LOG2 */
-/* 64-bit machines, beware! SRB. */
-#define SIZEOF_PTR_LOG2 2
+extern unsigned long empty_zero_page[1024];
+#define ZERO_PAGE(vaddr) (mem_map + MAP_NR(empty_zero_page))
-/* to find an entry in a page-table */
-#define PAGE_PTR(address) \
-((unsigned long)(address)>>(PAGE_SHIFT-SIZEOF_PTR_LOG2)&PTR_MASK&~PAGE_MASK)
+/*
+ * Handling allocation failures during page table setup.
+ */
+extern void __handle_bad_pmd(pmd_t * pmd);
+extern void __handle_bad_pmd_kernel(pmd_t * pmd);
#define pte_none(x) (!pte_val(x))
#define pte_present(x) (pte_val(x) & (_PAGE_PRESENT | _PAGE_PROTNONE))
#define pte_clear(xp) do { pte_val(*(xp)) = 0; } while (0)
+#define pte_pagenr(x) ((unsigned long)((pte_val(x) >> PAGE_SHIFT)))
#define pmd_none(x) (!pmd_val(x))
#define pmd_bad(x) ((pmd_val(x) & (~PAGE_MASK & ~_PAGE_USER)) != _KERNPG_TABLE)
#define pmd_clear(xp) do { pmd_val(*(xp)) = 0; } while (0)
/*
- * The "pgd_xxx()" functions here are trivial for a folded two-level
- * setup: the pgd is never bad, and a pmd always exists (as it's folded
- * into the pgd entry)
+ * Permanent address of a page. Obviously must never be
+ * called on a highmem page.
*/
-extern inline int pgd_none(pgd_t pgd) { return 0; }
-extern inline int pgd_bad(pgd_t pgd) { return 0; }
-extern inline int pgd_present(pgd_t pgd) { return 1; }
-extern inline void pgd_clear(pgd_t * pgdp) { }
+#define page_address(page) ({ if (PageHighMem(page)) BUG(); PAGE_OFFSET + (((page) - mem_map) << PAGE_SHIFT); })
+#define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT))
+#define pte_page(x) (mem_map+pte_pagenr(x))
/*
* The following only work if pte_present() is true.
* Conversion functions: convert a page and protection to a page entry,
* and a page entry and page directory to the page they refer to.
*/
-#define mk_pte(page, pgprot) \
-({ pte_t __pte; pte_val(__pte) = __pa(page) + pgprot_val(pgprot); __pte; })
+
+extern inline pte_t mk_pte(struct page *page, pgprot_t pgprot)
+{
+ pte_t __pte;
+
+ pte_val(__pte) = (page-mem_map)*(unsigned long long)PAGE_SIZE +
+ pgprot_val(pgprot);
+ return __pte;
+}
/* This takes a physical page address that is used by the remapping functions */
#define mk_pte_phys(physpage, pgprot) \
extern inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
{ pte_val(pte) = (pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot); return pte; }
-#define pte_page(pte) \
-((unsigned long) __va(pte_val(pte) & PAGE_MASK))
+#define page_pte_prot(page,prot) mk_pte(page, prot)
+#define page_pte(page) page_pte_prot(page, __pgprot(0))
#define pmd_page(pmd) \
((unsigned long) __va(pmd_val(pmd) & PAGE_MASK))
-/* to find an entry in a page-table-directory */
-#define pgd_offset(mm, address) \
-((mm)->pgd + ((address) >> PGDIR_SHIFT))
+/* to find an entry in a page-table-directory. */
+#define __pgd_offset(address) \
+ ((address >> PGDIR_SHIFT) & (PTRS_PER_PGD-1))
+
+#define pgd_offset(mm, address) ((mm)->pgd+__pgd_offset(address))
/* to find an entry in a kernel page-table-directory */
#define pgd_offset_k(address) pgd_offset(&init_mm, address)
-/* Find an entry in the second-level page table.. */
-extern inline pmd_t * pmd_offset(pgd_t * dir, unsigned long address)
-{
- return (pmd_t *) dir;
-}
+#define __pmd_offset(address) \
+ (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1))
-/* Find an entry in the third-level page table.. */
-#define pte_offset(pmd, address) \
-((pte_t *) (pmd_page(*pmd) + ((address>>10) & ((PTRS_PER_PTE-1)<<2))))
+/* Find an entry in the third-level page table.. */
+#define __pte_offset(address) \
+ ((address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1))
+#define pte_offset(dir, address) ((pte_t *) pmd_page(*(dir)) + \
+ __pte_offset(address))
/*
* Allocate and free page tables. The xxx_kernel() versions are
* if any.
*/
-#define pgd_quicklist (current_cpu_data.pgd_quick)
-#define pmd_quicklist ((unsigned long *)0)
-#define pte_quicklist (current_cpu_data.pte_quick)
-#define pgtable_cache_size (current_cpu_data.pgtable_cache_sz)
-
extern __inline__ pgd_t *get_pgd_slow(void)
{
pgd_t *ret = (pgd_t *)__get_free_page(GFP_KERNEL);
if (ret) {
+#if 0
+ /*
+ * On PAE allocating a whole page is overkill - we will
+ * either embedd this in mm_struct, or do a SLAB cache.
+ */
+ memcpy(ret, swapper_pg_dir, PTRS_PER_PGD * sizeof(pgd_t));
+#endif
+#if CONFIG_X86_PAE
+ int i;
+ for (i = 0; i < USER_PTRS_PER_PGD; i++)
+ __pgd_clear(ret + i);
+#else
memset(ret, 0, USER_PTRS_PER_PGD * sizeof(pgd_t));
+#endif
memcpy(ret + USER_PTRS_PER_PGD, swapper_pg_dir + USER_PTRS_PER_PGD, (PTRS_PER_PGD - USER_PTRS_PER_PGD) * sizeof(pgd_t));
}
return ret;
free_page((unsigned long)pte);
}
-/* We don't use pmd cache, so these are dummy routines */
-extern __inline__ pmd_t *get_pmd_fast(void)
-{
- return (pmd_t *)0;
-}
-
-extern __inline__ void free_pmd_fast(pmd_t *pmd)
-{
-}
-
-extern __inline__ void free_pmd_slow(pmd_t *pmd)
-{
-}
-
-extern void __bad_pte(pmd_t *pmd);
-extern void __bad_pte_kernel(pmd_t *pmd);
-
#define pte_free_kernel(pte) free_pte_slow(pte)
-#define pte_free(pte) free_pte_slow(pte)
-#define pgd_free(pgd) free_pgd_slow(pgd)
-#define pgd_alloc() get_pgd_fast()
+#define pte_free(pte) free_pte_slow(pte)
+#define pgd_free(pgd) free_pgd_slow(pgd)
+#define pgd_alloc() get_pgd_fast()
extern inline pte_t * pte_alloc_kernel(pmd_t * pmd, unsigned long address)
{
+ if (!pmd)
+ BUG();
address = (address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1);
if (pmd_none(*pmd)) {
pte_t * page = (pte_t *) get_pte_fast();
return page + address;
}
if (pmd_bad(*pmd)) {
- __bad_pte_kernel(pmd);
+ __handle_bad_pmd_kernel(pmd);
return NULL;
}
return (pte_t *) pmd_page(*pmd) + address;
extern inline pte_t * pte_alloc(pmd_t * pmd, unsigned long address)
{
- address = (address >> (PAGE_SHIFT-2)) & 4*(PTRS_PER_PTE - 1);
+ address = (address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1);
if (pmd_none(*pmd))
goto getnew;
if (pmd_bad(*pmd))
goto fix;
- return (pte_t *) (pmd_page(*pmd) + address);
+ return (pte_t *)pmd_page(*pmd) + address;
getnew:
{
unsigned long page = (unsigned long) get_pte_fast();
if (!page)
return get_pte_slow(pmd, address);
pmd_val(*pmd) = _PAGE_TABLE + __pa(page);
- return (pte_t *) (page + address);
+ return (pte_t *)page + address;
}
fix:
- __bad_pte(pmd);
+ __handle_bad_pmd(pmd);
return NULL;
}
/*
* allocating and freeing a pmd is trivial: the 1-entry pmd is
* inside the pgd, so has no extra memory associated with it.
+ * (In the PAE case we free the page.)
*/
-extern inline void pmd_free(pmd_t * pmd)
-{
-}
-
-extern inline pmd_t * pmd_alloc(pgd_t * pgd, unsigned long address)
-{
- return (pmd_t *) pgd;
-}
+#define pmd_free(pmd) free_pmd_slow(pmd)
#define pmd_free_kernel pmd_free
#define pmd_alloc_kernel pmd_alloc
#ifdef __SMP__
int i;
#endif
-
+
read_lock(&tasklist_lock);
for_each_task(p) {
if (!p->mm)
{
}
-#define SWP_TYPE(entry) (((entry) >> 1) & 0x3f)
-#define SWP_OFFSET(entry) ((entry) >> 8)
-#define SWP_ENTRY(type,offset) (((type) << 1) | ((offset) << 8))
+#define SWP_TYPE(entry) (((pte_val(entry)) >> 1) & 0x3f)
+#define SWP_OFFSET(entry) ((pte_val(entry)) >> 8)
#define module_map vmalloc
#define module_unmap vfree
#define io_remap_page_range remap_page_range
-#endif /* _I386_PAGE_H */
+#endif /* _I386_PGTABLE_H */
int coma_bug;
unsigned long loops_per_sec;
unsigned long *pgd_quick;
+ unsigned long *pmd_quick;
unsigned long *pte_quick;
unsigned long pgtable_cache_sz;
};
#define current_cpu_data boot_cpu_data
#endif
+#define cpu_has_pge \
+ (boot_cpu_data.x86_capability & X86_FEATURE_PGE)
+#define cpu_has_pse \
+ (boot_cpu_data.x86_capability & X86_FEATURE_PSE)
+#define cpu_has_pae \
+ (boot_cpu_data.x86_capability & X86_FEATURE_PAE)
#define cpu_has_tsc \
(cpu_data[smp_processor_id()].x86_capability & X86_FEATURE_TSC)
extern int smp_found_config;
extern void init_smp_config(void);
-extern unsigned long smp_alloc_memory(unsigned long mem_base);
+extern void init_smp_mappings(void);
+extern void smp_alloc_memory(void);
extern unsigned long cpu_present_map;
extern unsigned long cpu_online_map;
extern volatile unsigned long smp_invalidate_needed;
extern void smp_local_timer_interrupt(struct pt_regs * regs);
extern void (*mtrr_hook) (void);
extern void setup_APIC_clocks(void);
+extern void zap_low_mappings (void);
extern volatile int cpu_number_map[NR_CPUS];
extern volatile int __cpu_logical_map[NR_CPUS];
extern inline int cpu_logical_map(int cpu)
+++ /dev/null
-#ifndef _LINUX_BIGMEM_H
-#define _LINUX_BIGMEM_H
-
-#include <linux/config.h>
-
-#ifdef CONFIG_BIGMEM
-
-#include <asm/bigmem.h>
-
-/* declarations for linux/mm/bigmem.c */
-extern unsigned long bigmem_mapnr;
-extern int nr_free_bigpages;
-
-extern struct page * prepare_bigmem_swapout(struct page *);
-extern struct page * replace_with_bigmem(struct page *);
-
-#else /* CONFIG_BIGMEM */
-
-#define prepare_bigmem_swapout(page) page
-#define replace_with_bigmem(page) page
-#define kmap(kaddr, type) kaddr
-#define kunmap(vaddr, type) do { } while (0)
-#define nr_free_bigpages 0
-
-#endif /* CONFIG_BIGMEM */
-
-/* when CONFIG_BIGMEM is not set these will be plain clear/copy_page */
-extern inline void clear_bigpage(unsigned long kaddr)
-{
- unsigned long vaddr;
-
- vaddr = kmap(kaddr, KM_WRITE);
- clear_page(vaddr);
- kunmap(vaddr, KM_WRITE);
-}
-
-extern inline void copy_bigpage(unsigned long to, unsigned long from)
-{
- unsigned long vfrom, vto;
-
- vfrom = kmap(from, KM_READ);
- vto = kmap(to, KM_WRITE);
- copy_page(vto, vfrom);
- kunmap(vfrom, KM_READ);
- kunmap(vto, KM_WRITE);
-}
-
-#endif /* _LINUX_BIGMEM_H */
*/
struct linux_binprm{
char buf[128];
- unsigned long page[MAX_ARG_PAGES];
+ struct page *page[MAX_ARG_PAGES];
unsigned long p; /* current top of mem */
int sh_bang;
struct dentry * dentry;
--- /dev/null
+#ifndef _LINUX_BOOTMEM_H
+#define _LINUX_BOOTMEM_H
+
+#include <linux/config.h>
+#include <asm/pgtable.h>
+
+/*
+ * simple boot-time physical memory area allocator.
+ */
+
+extern unsigned long max_low_pfn;
+
+extern unsigned long __init init_bootmem (unsigned long addr, unsigned long memend);
+extern void __init reserve_bootmem (unsigned long addr, unsigned long size);
+extern void __init free_bootmem (unsigned long addr, unsigned long size);
+extern void * __init __alloc_bootmem (unsigned long size, unsigned long align);
+#define alloc_bootmem(x) __alloc_bootmem((x), SMP_CACHE_BYTES)
+#define alloc_bootmem_pages(x) __alloc_bootmem((x), PAGE_SIZE)
+extern unsigned long __init free_all_bootmem (void);
+
+#endif /* _LINUX_BOOTMEM_H */
+
+
+
#include <linux/quota.h>
#include <linux/mount.h>
+/*
+ * oh the beauties of C type declarations.
+ */
+struct page;
+
struct inode {
struct list_head i_hash;
struct list_head i_list;
wait_queue_head_t i_wait;
struct file_lock *i_flock;
struct vm_area_struct *i_mmap;
- struct page *i_pages;
+ struct list_head i_pages;
spinlock_t i_shared_lock;
struct dquot *i_dquot[MAXQUOTAS];
struct pipe_inode_info *i_pipe;
extern int try_to_free_buffers(struct page *);
extern void refile_buffer(struct buffer_head * buf);
-extern atomic_t buffermem;
-
#define BUF_CLEAN 0
#define BUF_LOCKED 1 /* Buffers scheduled for write */
#define BUF_DIRTY 2 /* Dirty buffers, not yet scheduled for write */
int error;
} read_descriptor_t;
-typedef int (*read_actor_t)(read_descriptor_t *, const char *, unsigned long);
+typedef int (*read_actor_t)(read_descriptor_t *, struct page *, unsigned long, unsigned long);
extern struct dentry * lookup_dentry(const char *, struct dentry *, unsigned int);
--- /dev/null
+#ifndef _LINUX_HIGHMEM_H
+#define _LINUX_HIGHMEM_H
+
+#include <linux/config.h>
+#include <asm/pgtable.h>
+
+#ifdef CONFIG_HIGHMEM
+
+extern struct page *highmem_start_page;
+
+#include <asm/highmem.h>
+
+/* declarations for linux/mm/highmem.c */
+extern unsigned long highmem_mapnr;
+extern unsigned long nr_free_highpages;
+
+extern struct page * prepare_highmem_swapout(struct page *);
+extern struct page * replace_with_highmem(struct page *);
+
+#else /* CONFIG_HIGHMEM */
+
+#define prepare_highmem_swapout(page) page
+#define replace_with_highmem(page) page
+#define kmap(page, type) page_address(page)
+#define kunmap(vaddr, type) do { } while (0)
+#define nr_free_highpages 0UL
+
+#endif /* CONFIG_HIGHMEM */
+
+/* when CONFIG_HIGHMEM is not set these will be plain clear/copy_page */
+extern inline void clear_highpage(struct page *page)
+{
+ unsigned long kaddr;
+
+ kaddr = kmap(page, KM_WRITE);
+ clear_page((void *)kaddr);
+ kunmap(kaddr, KM_WRITE);
+}
+
+extern inline void memclear_highpage(struct page *page, unsigned int offset, unsigned int size)
+{
+ unsigned long kaddr;
+
+ if (offset + size > PAGE_SIZE)
+ BUG();
+ kaddr = kmap(page, KM_WRITE);
+ memset((void *)(kaddr + offset), 0, size);
+ kunmap(kaddr, KM_WRITE);
+}
+
+/*
+ * Same but also flushes aliased cache contents to RAM.
+ */
+extern inline void memclear_highpage_flush(struct page *page, unsigned int offset, unsigned int size)
+{
+ unsigned long kaddr;
+
+ if (offset + size > PAGE_SIZE)
+ BUG();
+ kaddr = kmap(page, KM_WRITE);
+ memset((void *)(kaddr + offset), 0, size);
+ flush_page_to_ram(kaddr);
+ kunmap(kaddr, KM_WRITE);
+}
+
+extern inline void copy_highpage(struct page *to, struct page *from)
+{
+ unsigned long vfrom, vto;
+
+ vfrom = kmap(from, KM_READ);
+ vto = kmap(to, KM_WRITE);
+ copy_page((void *)vto, (void *)vfrom);
+ kunmap(vfrom, KM_READ);
+ kunmap(vto, KM_WRITE);
+}
+
+#endif /* _LINUX_HIGHMEM_H */
* region, there won't necessarily be page structs defined for
* every address. */
- unsigned long * pagelist;
struct page ** maplist;
unsigned int locked : 1; /* If set, pages has been locked */
unsigned long totalswap; /* Total swap space size */
unsigned long freeswap; /* swap space still available */
unsigned short procs; /* Number of current processes */
- unsigned long totalbig; /* Total big memory size */
- unsigned long freebig; /* Available big memory size */
- char _f[20-2*sizeof(long)]; /* Padding: libc5 uses this.. */
+ unsigned long totalhigh; /* Total high memory size */
+ unsigned long freehigh; /* Available high memory size */
+ unsigned int mem_unit; /* Memory unit size in bytes */
+ char _f[20-2*sizeof(long)-sizeof(int)]; /* Padding: libc5 uses this.. */
};
#endif
#include <linux/config.h>
#include <linux/string.h>
+#include <linux/list.h>
extern unsigned long max_mapnr;
extern unsigned long num_physpages;
void (*protect)(struct vm_area_struct *area, unsigned long, size_t, unsigned int newprot);
int (*sync)(struct vm_area_struct *area, unsigned long, size_t, unsigned int flags);
void (*advise)(struct vm_area_struct *area, unsigned long, size_t, unsigned int advise);
- unsigned long (*nopage)(struct vm_area_struct * area, unsigned long address, int write_access);
- unsigned long (*wppage)(struct vm_area_struct * area, unsigned long address,
- unsigned long page);
+ struct page * (*nopage)(struct vm_area_struct * area, unsigned long address, int write_access);
+ struct page * (*wppage)(struct vm_area_struct * area, unsigned long address, struct page * page);
int (*swapout)(struct vm_area_struct *, struct page *);
};
*/
typedef struct page {
/* these must be first (free area handling) */
- struct page *next;
- struct page *prev;
+ struct list_head list;
struct inode *inode;
unsigned long offset;
struct page *next_hash;
#define PG_uptodate 3
#define PG_decr_after 5
#define PG_DMA 7
-#define PG_Slab 8
+#define PG_slab 8
#define PG_swap_cache 9
#define PG_skip 10
#define PG_swap_entry 11
-#define PG_BIGMEM 12
+#define PG_highmem 12
/* bits 21-30 unused */
#define PG_reserved 31
#define PageReferenced(page) (test_bit(PG_referenced, &(page)->flags))
#define PageDecrAfter(page) (test_bit(PG_decr_after, &(page)->flags))
#define PageDMA(page) (test_bit(PG_DMA, &(page)->flags))
-#define PageSlab(page) (test_bit(PG_Slab, &(page)->flags))
+#define PageSlab(page) (test_bit(PG_slab, &(page)->flags))
#define PageSwapCache(page) (test_bit(PG_swap_cache, &(page)->flags))
#define PageReserved(page) (test_bit(PG_reserved, &(page)->flags))
-#define PageSetSlab(page) (set_bit(PG_Slab, &(page)->flags))
+#define PageSetSlab(page) (set_bit(PG_slab, &(page)->flags))
#define PageSetSwapCache(page) (set_bit(PG_swap_cache, &(page)->flags))
#define PageTestandSetSwapCache(page) \
(test_and_set_bit(PG_swap_cache, &(page)->flags))
-#define PageClearSlab(page) (clear_bit(PG_Slab, &(page)->flags))
+#define PageClearSlab(page) (clear_bit(PG_slab, &(page)->flags))
#define PageClearSwapCache(page)(clear_bit(PG_swap_cache, &(page)->flags))
#define PageTestandClearSwapCache(page) \
(test_and_clear_bit(PG_swap_cache, &(page)->flags))
-#ifdef CONFIG_BIGMEM
-#define PageBIGMEM(page) (test_bit(PG_BIGMEM, &(page)->flags))
+#ifdef CONFIG_HIGHMEM
+#define PageHighMem(page) (test_bit(PG_highmem, &(page)->flags))
#else
-#define PageBIGMEM(page) 0 /* needed to optimize away at compile time */
+#define PageHighMem(page) 0 /* needed to optimize away at compile time */
#endif
+#define SetPageReserved(page) do { set_bit(PG_reserved, &(page)->flags); \
+ } while (0)
+#define ClearPageReserved(page) do { test_and_clear_bit(PG_reserved, &(page)->flags); } while (0)
+
+
/*
* Various page->flags bits:
*
* (e.g. a private data page of one process).
*
* A page may be used for kmalloc() or anyone else who does a
- * get_free_page(). In this case the page->count is at least 1, and
+ * __get_free_page(). In this case the page->count is at least 1, and
* all other fields are unused but should be 0 or NULL. The
* management of this page is the responsibility of the one who uses
* it.
* goes to clearing the page. If you want a page without the clearing
* overhead, just use __get_free_page() directly..
*/
+extern struct page * __get_pages(int gfp_mask, unsigned long order);
#define __get_free_page(gfp_mask) __get_free_pages((gfp_mask),0)
#define __get_dma_pages(gfp_mask, order) __get_free_pages((gfp_mask) | GFP_DMA,(order))
extern unsigned long FASTCALL(__get_free_pages(int gfp_mask, unsigned long gfp_order));
+extern struct page * get_free_highpage(int gfp_mask);
-extern inline unsigned long get_free_page(int gfp_mask)
+extern inline unsigned long get_zeroed_page(int gfp_mask)
{
unsigned long page;
page = __get_free_page(gfp_mask);
if (page)
- clear_page(page);
+ clear_page((void *)page);
return page;
}
+/*
+ * The old interface name will be removed in 2.5:
+ */
+#define get_free_page get_zeroed_page
+
/* memory.c & swap.c*/
#define free_page(addr) free_pages((addr),0)
extern int FASTCALL(__free_page(struct page *));
extern void show_free_areas(void);
-extern unsigned long put_dirty_page(struct task_struct * tsk,unsigned long page,
+extern struct page * put_dirty_page(struct task_struct * tsk, struct page *page,
unsigned long address);
extern void clear_page_tables(struct mm_struct *, unsigned long, int);
extern int pgt_cache_water[2];
extern int check_pgt_cache(void);
-extern unsigned long paging_init(unsigned long start_mem, unsigned long end_mem);
-extern void mem_init(unsigned long start_mem, unsigned long end_mem);
+extern void paging_init(void);
+extern void free_area_init(unsigned long);
+extern void mem_init(void);
extern void show_mem(void);
extern void oom(struct task_struct * tsk);
extern void si_meminfo(struct sysinfo * val);
-extern void swapin_readahead(unsigned long);
+extern void swapin_readahead(pte_t);
/* mmap.c */
extern void vma_init(void);
#define __GFP_HIGH 0x08
#define __GFP_IO 0x10
#define __GFP_SWAP 0x20
-#ifdef CONFIG_BIGMEM
-#define __GFP_BIGMEM 0x40
+#ifdef CONFIG_HIGHMEM
+#define __GFP_HIGHMEM 0x40
#else
-#define __GFP_BIGMEM 0x0 /* noop */
+#define __GFP_HIGHMEM 0x0 /* noop */
#endif
#define __GFP_DMA 0x80
#define GFP_BUFFER (__GFP_LOW | __GFP_WAIT)
#define GFP_ATOMIC (__GFP_HIGH)
-#define GFP_BIGUSER (__GFP_LOW | __GFP_WAIT | __GFP_IO | __GFP_BIGMEM)
#define GFP_USER (__GFP_LOW | __GFP_WAIT | __GFP_IO)
+#define GFP_HIGHUSER (GFP_USER | __GFP_HIGHMEM)
#define GFP_KERNEL (__GFP_MED | __GFP_WAIT | __GFP_IO)
#define GFP_NFS (__GFP_HIGH | __GFP_WAIT | __GFP_IO)
#define GFP_KSWAPD (__GFP_IO | __GFP_SWAP)
#define GFP_DMA __GFP_DMA
-/* Flag - indicates that the buffer can be taken from big memory which is not
+/* Flag - indicates that the buffer can be taken from high memory which is not
directly addressable by the kernel */
-#define GFP_BIGMEM __GFP_BIGMEM
+#define GFP_HIGHMEM __GFP_HIGHMEM
/* vma is the first one with address < vma->vm_end,
* and even address < vma->vm_start. Have to extend vma. */
extern struct vm_area_struct *find_extend_vma(struct task_struct *tsk, unsigned long addr);
-#define buffer_under_min() ((atomic_read(&buffermem) >> PAGE_SHIFT) * 100 < \
+#define buffer_under_min() (atomic_read(&buffermem_pages) * 100 < \
buffer_mem.min_percent * num_physpages)
#define pgcache_under_min() (atomic_read(&page_cache_size) * 100 < \
page_cache.min_percent * num_physpages)
#include <linux/mm.h>
#include <linux/fs.h>
+#include <linux/highmem.h>
+#include <linux/list.h>
-static inline unsigned long page_address(struct page * page)
+extern inline pte_t get_pagecache_pte(struct page *page)
{
- return PAGE_OFFSET + ((page - mem_map) << PAGE_SHIFT);
+ /*
+ * the pagecache is still machineword sized. The rest of the VM
+ * can deal with arbitrary sized ptes.
+ */
+ return __pte(page->offset);
}
/*
#define PAGE_CACHE_MASK PAGE_MASK
#define PAGE_CACHE_ALIGN(addr) (((addr)+PAGE_CACHE_SIZE-1)&PAGE_CACHE_MASK)
-#define page_cache_alloc() __get_free_page(GFP_USER)
-#define page_cache_free(x) free_page(x)
+#define page_cache_alloc() __get_pages(GFP_USER, 0)
+#define page_cache_free(x) __free_page(x)
#define page_cache_release(x) __free_page(x)
/*
* inode pointer and offsets are distributed (ie, we
* roughly know which bits are "significant")
*/
-static inline unsigned long _page_hashfn(struct inode * inode, unsigned long offset)
+extern inline unsigned long _page_hashfn(struct inode * inode, unsigned long offset)
{
#define i (((unsigned long) inode)/(sizeof(struct inode) & ~ (sizeof(struct inode) - 1)))
#define o (offset >> PAGE_SHIFT)
extern void add_to_page_cache(struct page * page, struct inode * inode, unsigned long offset);
extern int add_to_page_cache_unique(struct page * page, struct inode * inode, unsigned long offset, struct page **hash);
-static inline void add_page_to_hash_queue(struct page * page, struct inode * inode, unsigned long offset)
+extern inline void add_page_to_hash_queue(struct page * page, struct inode * inode, unsigned long offset)
{
__add_page_to_hash_queue(page, page_hash(inode,offset));
}
-static inline void add_page_to_inode_queue(struct inode * inode, struct page * page)
+extern inline void add_page_to_inode_queue(struct inode * inode, struct page * page)
{
- struct page **p = &inode->i_pages;
-
- inode->i_nrpages++;
+ struct list_head *head = &inode->i_pages;
+
+ if (!inode->i_nrpages++) {
+ if (!list_empty(head))
+ BUG();
+ } else {
+ if (list_empty(head))
+ BUG();
+ }
+ list_add(&page->list, head);
page->inode = inode;
- page->prev = NULL;
- if ((page->next = *p) != NULL)
- page->next->prev = page;
- *p = page;
+}
+
+extern inline void remove_page_from_inode_queue(struct page * page)
+{
+ struct inode * inode = page->inode;
+
+ inode->i_nrpages--;
+ list_del(&page->list);
}
extern void ___wait_on_page(struct page *);
-static inline void wait_on_page(struct page * page)
+extern inline void wait_on_page(struct page * page)
{
if (PageLocked(page))
___wait_on_page(page);
/* files */ &init_files, \
/* mm */ NULL, &init_mm, \
/* signals */ SPIN_LOCK_UNLOCKED, &init_signals, {{0}}, {{0}}, NULL, &init_task.sigqueue, 0, 0, \
-/* exec cts */ 0,0,0, \
+/* exec cts */ 0,0, \
}
#ifndef INIT_TASK_SIZE
struct shmid_ds u;
/* the following are private */
unsigned long shm_npages; /* size of segment (pages) */
- unsigned long *shm_pages; /* array of ptrs to frames -> SHMMAX */
+ pte_t *shm_pages; /* array of ptrs to frames -> SHMMAX */
struct vm_area_struct *attaches; /* descriptors for attaches */
};
asmlinkage long sys_shmat (int shmid, char *shmaddr, int shmflg, unsigned long *addr);
asmlinkage long sys_shmdt (char *shmaddr);
asmlinkage long sys_shmctl (int shmid, int cmd, struct shmid_ds *buf);
-extern void shm_unuse(unsigned long entry, unsigned long page);
+extern void shm_unuse(pte_t entry, struct page *page);
#endif /* __KERNEL__ */
#define SLAB_CTOR_VERIFY 0x004UL /* tell constructor it's a verify call */
/* prototypes */
-extern long kmem_cache_init(long, long);
+extern void kmem_cache_init(void);
extern void kmem_cache_sizes_init(void);
extern kmem_cache_t *kmem_find_general_cachep(size_t);
extern kmem_cache_t *kmem_cache_create(const char *, size_t, size_t, unsigned long,
#define MAX_SWAP_BADPAGES \
((__swapoffset(magic.magic) - __swapoffset(info.badpages)) / sizeof(int))
-#undef DEBUG_SWAP
-
#include <asm/atomic.h>
#define SWP_USED 1
extern atomic_t nr_async_pages;
extern struct inode swapper_inode;
extern atomic_t page_cache_size;
-extern atomic_t buffermem;
+extern atomic_t buffermem_pages;
/* Incomplete types for prototype declarations: */
struct task_struct;
/* linux/mm/page_io.c */
extern void rw_swap_page(int, struct page *, int);
-extern void rw_swap_page_nolock(int, unsigned long, char *, int);
-extern void swap_after_unlock_page (unsigned long entry);
+extern void rw_swap_page_nolock(int, pte_t, char *, int);
/* linux/mm/page_alloc.c */
/* linux/mm/swap_state.c */
extern void show_swap_cache_info(void);
-extern void add_to_swap_cache(struct page *, unsigned long);
-extern int swap_duplicate(unsigned long);
+extern void add_to_swap_cache(struct page *, pte_t);
+extern int swap_duplicate(pte_t);
extern int swap_check_entry(unsigned long);
-struct page * lookup_swap_cache(unsigned long);
-extern struct page * read_swap_cache_async(unsigned long, int);
+struct page * lookup_swap_cache(pte_t);
+extern struct page * read_swap_cache_async(pte_t, int);
#define read_swap_cache(entry) read_swap_cache_async(entry, 1);
-extern int FASTCALL(swap_count(unsigned long));
-extern unsigned long acquire_swap_entry(struct page *page);
+extern int swap_count(struct page *);
+extern pte_t acquire_swap_entry(struct page *page);
/*
* Make these inline later once they are working properly.
*/
extern void __delete_from_swap_cache(struct page *page);
extern void delete_from_swap_cache(struct page *page);
-extern void free_page_and_swap_cache(unsigned long addr);
+extern void free_page_and_swap_cache(struct page *page);
/* linux/mm/swapfile.c */
extern unsigned int nr_swapfiles;
extern struct swap_info_struct swap_info[];
extern int is_swap_partition(kdev_t);
void si_swapinfo(struct sysinfo *);
-unsigned long get_swap_page(void);
-extern void FASTCALL(swap_free(unsigned long));
+pte_t get_swap_page(void);
+extern void swap_free(pte_t);
struct swap_list_t {
int head; /* head of priority-ordered swapfile list */
int next; /* swapfile to be used next */
return 1;
count = page_count(page);
if (PageSwapCache(page))
- count += swap_count(page->offset) - 2;
+ count += swap_count(page) - 2;
return count > 1;
}
extern int kmsg_redirect;
-extern unsigned long con_init(unsigned long);
+extern void con_init(void);
+extern void console_init(void);
extern int rs_init(void);
extern int lp_init(void);
extern int pty_init(void);
-extern int tty_init(void);
+extern void tty_init(void);
extern int ip2_init(void);
extern int pcxe_init(void);
extern int pc_init(void);
/* serial.c */
-extern long serial_console_init(long kmem_start, long kmem_end);
+extern void serial_console_init(void);
/* pcxx.c */
#include <linux/blk.h>
#include <linux/hdreg.h>
#include <linux/iobuf.h>
+#include <linux/bootmem.h>
#include <asm/io.h>
#include <asm/bugs.h>
extern void init_IRQ(void);
extern void init_modules(void);
-extern long console_init(long, long);
extern void sock_init(void);
extern void fork_init(unsigned long);
extern void mca_init(void);
extern void time_init(void);
-static unsigned long memory_start = 0;
-static unsigned long memory_end = 0;
-
int rows, cols;
#ifdef CONFIG_BLK_DEV_INITRD
}
-extern void setup_arch(char **, unsigned long *, unsigned long *);
+extern void setup_arch(char **);
extern void cpu_idle(void);
#ifndef __SMP__
asmlinkage void __init start_kernel(void)
{
char * command_line;
-
+ unsigned long mempages;
/*
* Interrupts are still disabled. Do necessary setups, then
* enable them
*/
lock_kernel();
printk(linux_banner);
- setup_arch(&command_line, &memory_start, &memory_end);
- memory_start = paging_init(memory_start,memory_end);
+ setup_arch(&command_line);
+ paging_init();
trap_init();
init_IRQ();
sched_init();
* we've done PCI setups etc, and console_init() must be aware of
* this. But we do want output early, in case something goes wrong.
*/
- memory_start = console_init(memory_start,memory_end);
+ console_init();
#ifdef CONFIG_MODULES
init_modules();
#endif
if (prof_shift) {
- prof_buffer = (unsigned int *) memory_start;
+ unsigned int size;
/* only text is profiled */
prof_len = (unsigned long) &_etext - (unsigned long) &_stext;
prof_len >>= prof_shift;
- memory_start += prof_len * sizeof(unsigned int);
- memset(prof_buffer, 0, prof_len * sizeof(unsigned int));
+
+ size = prof_len * sizeof(unsigned int) + PAGE_SIZE-1;
+ prof_buffer = (unsigned int *) alloc_bootmem(size);
+ memset(prof_buffer, 0, size);
}
- memory_start = kmem_cache_init(memory_start, memory_end);
+ kmem_cache_init();
sti();
calibrate_delay();
#ifdef CONFIG_BLK_DEV_INITRD
+ // FIXME, use the bootmem.h interface.
if (initrd_start && !initrd_below_start_ok && initrd_start < memory_start) {
printk(KERN_CRIT "initrd overwritten (0x%08lx < 0x%08lx) - "
"disabling it.\n",initrd_start,memory_start);
initrd_start = 0;
}
#endif
- mem_init(memory_start,memory_end);
+ mem_init();
kmem_cache_sizes_init();
#ifdef CONFIG_PROC_FS
proc_root_init();
#endif
- fork_init(memory_end-memory_start);
+ mempages = num_physpages;
+
+ fork_init(mempages);
filescache_init();
dcache_init();
vma_init();
- buffer_init(memory_end-memory_start);
- page_cache_init(memory_end-memory_start);
+ buffer_init(mempages);
+ page_cache_init(mempages);
kiobuf_init();
signals_init();
inode_init();
/*
* linux/ipc/shm.c
* Copyright (C) 1992, 1993 Krishna Balasubramanian
- * Many improvements/fixes by Bruno Haible.
+ * Many improvements/fixes by Bruno Haible.
* Replaced `struct shm_desc' by `struct vm_area_struct', July 1994.
* Fixed the shm swap deallocation (shm_unuse()), August 1998 Andrea Arcangeli.
*
* /proc/sysvipc/shm support (c) 1999 Dragos Acostachioaie <dragos@iname.com>
* BIGMEM support, Andrea Arcangeli <andrea@suse.de>
* SMP thread shm, Jean-Luc Boyard <jean-luc.boyard@siemens.fr>
+ * HIGHMEM support, Ingo Molnar <mingo@redhat.com>
*/
#include <linux/config.h>
#include <linux/vmalloc.h>
#include <linux/pagemap.h>
#include <linux/proc_fs.h>
-#include <linux/bigmem.h>
+#include <linux/highmem.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
extern int ipcperms (struct ipc_perm *ipcp, short shmflg);
-extern unsigned long get_swap_page (void);
static int findkey (key_t key);
static int newseg (key_t key, int shmflg, int size);
static int shm_map (struct vm_area_struct *shmd);
static void killseg (int id);
static void shm_open (struct vm_area_struct *shmd);
static void shm_close (struct vm_area_struct *shmd);
-static unsigned long shm_nopage(struct vm_area_struct *, unsigned long, int);
+static struct page * shm_nopage(struct vm_area_struct *, unsigned long, int);
static int shm_swapout(struct vm_area_struct *, struct page *);
#ifdef CONFIG_PROC_FS
static int sysvipc_shm_read_proc(char *buffer, char **start, off_t offset, int length, int *eof, void *data);
{
struct shmid_kernel *shp;
int numpages = (size + PAGE_SIZE -1) >> PAGE_SHIFT;
- int id, i;
+ int id;
if (size < SHMMIN)
return -EINVAL;
return -ENOMEM;
}
lock_kernel();
- shp->shm_pages = (ulong *) vmalloc (numpages*sizeof(ulong));
+ shp->shm_pages = (pte_t *) vmalloc (numpages*sizeof(pte_t));
unlock_kernel();
if (!shp->shm_pages) {
kfree(shp);
return -ENOMEM;
}
- for (i = 0; i < numpages; shp->shm_pages[i++] = 0);
+ memset(shp->shm_pages, 0, numpages*sizeof(pte_t));
+
shp->u.shm_perm.key = key;
shp->u.shm_perm.mode = (shmflg & S_IRWXUGO);
shp->u.shm_perm.cuid = shp->u.shm_perm.uid = current->euid;
int rss, swp;
shp = shm_segs[id];
- if (shp == IPC_NOID || shp == IPC_UNUSED) {
- printk ("shm nono: killseg called on unused seg id=%d\n", id);
- return;
- }
+ if (shp == IPC_NOID || shp == IPC_UNUSED)
+ BUG();
shp->u.shm_perm.seq++; /* for shmat */
shm_seq = (shm_seq+1) % ((unsigned)(1<<31)/SHMMNI); /* increment, but avoid overflow */
shm_segs[id] = (struct shmid_kernel *) IPC_UNUSED;
used_segs--;
if (id == max_shmid)
while (max_shmid && (shm_segs[--max_shmid] == IPC_UNUSED));
- if (!shp->shm_pages) {
- printk ("shm nono: killseg shp->pages=NULL. id=%d\n", id);
- return;
- }
+ if (!shp->shm_pages)
+ BUG();
spin_unlock(&shm_lock);
numpages = shp->shm_npages;
for (i = 0, rss = 0, swp = 0; i < numpages ; i++) {
pte_t pte;
- pte = __pte(shp->shm_pages[i]);
+ pte = shp->shm_pages[i];
if (pte_none(pte))
continue;
if (pte_present(pte)) {
- free_page (pte_page(pte));
+ __free_page (pte_page(pte));
rss++;
} else {
lock_kernel();
- swap_free(pte_val(pte));
+ swap_free(pte);
unlock_kernel();
swp++;
}
down(¤t->mm->mmap_sem);
spin_lock(&shm_lock);
- if (shmid < 0) {
- /* printk("shmat() -> EINVAL because shmid = %d < 0\n",shmid); */
+ if (shmid < 0)
goto out;
- }
shp = shm_segs[id = (unsigned int) shmid % SHMMNI];
- if (shp == IPC_UNUSED || shp == IPC_NOID) {
- /* printk("shmat() -> EINVAL because shmid = %d is invalid\n",shmid); */
+ if (shp == IPC_UNUSED || shp == IPC_NOID)
goto out;
- }
if (!(addr = (ulong) shmaddr)) {
if (shmflg & SHM_REMAP)
*/
if (addr < current->mm->start_stack &&
addr > current->mm->start_stack - PAGE_SIZE*(shp->shm_npages + 4))
- {
- /* printk("shmat() -> EINVAL because segment intersects stack\n"); */
goto out;
- }
- if (!(shmflg & SHM_REMAP))
- if ((shmd = find_vma_intersection(current->mm, addr, addr + shp->u.shm_segsz))) {
- /* printk("shmat() -> EINVAL because the interval [0x%lx,0x%lx) intersects an already mapped interval [0x%lx,0x%lx).\n",
- addr, addr + shp->shm_segsz, shmd->vm_start, shmd->vm_end); */
- goto out;
- }
+ if (!(shmflg & SHM_REMAP) && find_vma_intersection(current->mm, addr, addr + shp->u.shm_segsz))
+ goto out;
err = -EACCES;
if (ipcperms(&shp->u.shm_perm, shmflg & SHM_RDONLY ? S_IRUGO : S_IRUGO|S_IWUGO))
shmd->vm_offset = 0;
shmd->vm_ops = &shm_vm_ops;
- shp->u.shm_nattch++; /* prevent destruction */
+ shp->u.shm_nattch++; /* prevent destruction */
spin_unlock(&shm_lock);
err = shm_map (shmd);
spin_lock(&shm_lock);
/*
* page not present ... go through shm_pages
*/
-static unsigned long shm_nopage(struct vm_area_struct * shmd, unsigned long address, int no_share)
+static struct page * shm_nopage(struct vm_area_struct * shmd, unsigned long address, int no_share)
{
pte_t pte;
struct shmid_kernel *shp;
unsigned int idx;
- unsigned long page;
- struct page * page_map;
+ struct page * page;
shp = *(struct shmid_kernel **) shmd->vm_private_data;
idx = (address - shmd->vm_start + shmd->vm_offset) >> PAGE_SHIFT;
-#ifdef DEBUG_SHM
- if (shp == IPC_UNUSED || shp == IPC_NOID) {
- printk ("shm_nopage: id=%d invalid. Race.\n", id);
- return 0;
- }
- if (idx >= shp->shm_npages) {
- printk ("shm_nopage : too large page index. id=%d\n", id);
- return 0;
- }
-#endif
-
spin_lock(&shm_lock);
- again:
- pte = __pte(shp->shm_pages[idx]);
+again:
+ pte = shp->shm_pages[idx];
if (!pte_present(pte)) {
if (pte_none(pte)) {
spin_unlock(&shm_lock);
- page = __get_free_page(GFP_BIGUSER);
+ page = get_free_highpage(GFP_HIGHUSER);
if (!page)
goto oom;
- clear_bigpage(page);
+ clear_highpage(page);
spin_lock(&shm_lock);
- if (pte_val(pte) != shp->shm_pages[idx])
+ if (pte_val(pte) != pte_val(shp->shm_pages[idx]))
goto changed;
} else {
- unsigned long entry = pte_val(pte);
+ pte_t entry = pte;
spin_unlock(&shm_lock);
- page_map = lookup_swap_cache(entry);
- if (!page_map) {
+ BUG();
+ page = lookup_swap_cache(entry);
+ if (!page) {
lock_kernel();
swapin_readahead(entry);
- page_map = read_swap_cache(entry);
+ page = read_swap_cache(entry);
unlock_kernel();
- if (!page_map)
+ if (!page)
goto oom;
}
- delete_from_swap_cache(page_map);
- page_map = replace_with_bigmem(page_map);
- page = page_address(page_map);
+ delete_from_swap_cache(page);
+ page = replace_with_highmem(page);
lock_kernel();
swap_free(entry);
unlock_kernel();
spin_lock(&shm_lock);
shm_swp--;
- pte = __pte(shp->shm_pages[idx]);
+ pte = shp->shm_pages[idx];
if (pte_present(pte))
goto present;
}
shm_rss++;
pte = pte_mkdirty(mk_pte(page, PAGE_SHARED));
- shp->shm_pages[idx] = pte_val(pte);
+ shp->shm_pages[idx] = pte;
} else
--current->maj_flt; /* was incremented in do_no_page */
-done: /* pte_val(pte) == shp->shm_pages[idx] */
- get_page(mem_map + MAP_NR(pte_page(pte)));
+done:
+ /* pte_val(pte) == shp->shm_pages[idx] */
+ get_page(pte_page(pte));
spin_unlock(&shm_lock);
current->min_flt++;
return pte_page(pte);
changed:
- free_page(page);
+ __free_page(page);
goto again;
present:
- free_page(page);
+ if (page)
+ free_page_and_swap_cache(page);
goto done;
oom:
- return -1;
+ return (struct page *)(-1);
}
/*
{
pte_t page;
struct shmid_kernel *shp;
- unsigned long swap_nr;
+ pte_t swap_entry;
unsigned long id, idx;
int loop = 0;
int counter;
counter = shm_rss >> prio;
lock_kernel();
- if (!counter || !(swap_nr = get_swap_page())) {
+ if (!counter || !pte_val(swap_entry = get_swap_page())) {
unlock_kernel();
return 0;
}
if (idx >= shp->shm_npages)
goto next_id;
- page = __pte(shp->shm_pages[idx]);
+ page = shp->shm_pages[idx];
if (!pte_present(page))
goto check_table;
- page_map = &mem_map[MAP_NR(pte_page(page))];
+ page_map = pte_page(page);
if ((gfp_mask & __GFP_DMA) && !PageDMA(page_map))
goto check_table;
- if (!(gfp_mask & __GFP_BIGMEM) && PageBIGMEM(page_map))
+ if (!(gfp_mask & __GFP_HIGHMEM) && PageHighMem(page_map))
goto check_table;
swap_attempts++;
if (--counter < 0) { /* failed */
- failed:
+failed:
spin_unlock(&shm_lock);
lock_kernel();
- swap_free (swap_nr);
+ swap_free(swap_entry);
unlock_kernel();
return 0;
}
- if (page_count(mem_map + MAP_NR(pte_page(page))) != 1)
+ if (page_count(page_map))
goto check_table;
- if (!(page_map = prepare_bigmem_swapout(page_map)))
+ if (!(page_map = prepare_highmem_swapout(page_map)))
goto check_table;
- shp->shm_pages[idx] = swap_nr;
+ shp->shm_pages[idx] = swap_entry;
swap_successes++;
shm_swp++;
shm_rss--;
spin_unlock(&shm_lock);
+
lock_kernel();
- swap_duplicate(swap_nr);
- add_to_swap_cache(page_map, swap_nr);
+ swap_duplicate(swap_entry);
+ add_to_swap_cache(page_map, swap_entry);
rw_swap_page(WRITE, page_map, 0);
unlock_kernel();
* Free the swap entry and set the new pte for the shm page.
*/
static void shm_unuse_page(struct shmid_kernel *shp, unsigned long idx,
- unsigned long page, unsigned long entry)
+ pte_t entry, struct page *page)
{
pte_t pte;
pte = pte_mkdirty(mk_pte(page, PAGE_SHARED));
- shp->shm_pages[idx] = pte_val(pte);
- get_page(mem_map + MAP_NR(page));
+ shp->shm_pages[idx] = pte;
+ get_page(page);
shm_rss++;
shm_swp--;
/*
* unuse_shm() search for an eventually swapped out shm page.
*/
-void shm_unuse(unsigned long entry, unsigned long page)
+void shm_unuse(pte_t entry, struct page *page)
{
int i, n;
spin_lock(&shm_lock);
- for (i = 0; i < SHMMNI; i++)
- if (shm_segs[i] != IPC_UNUSED && shm_segs[i] != IPC_NOID)
- for (n = 0; n < shm_segs[i]->shm_npages; n++)
- if (shm_segs[i]->shm_pages[n] == entry)
- {
- shm_unuse_page(shm_segs[i], n,
- page, entry);
- return;
- }
+ for (i = 0; i < SHMMNI; i++) {
+ struct shmid_kernel *seg = shm_segs[i];
+ if ((seg == IPC_UNUSED) || (seg == IPC_NOID))
+ continue;
+ for (n = 0; n < seg->shm_npages; n++)
+ if (pte_val(seg->shm_pages[n]) == pte_val(entry)) {
+ shm_unuse_page(seg, n, entry, page);
+ return;
+ }
+ }
spin_unlock(&shm_lock);
}
return 0;
}
-void __init fork_init(unsigned long memsize)
+void __init fork_init(unsigned long mempages)
{
int i;
* value: the thread structures can take up at most half
* of memory.
*/
- max_threads = memsize / THREAD_SIZE / 2;
+ max_threads = mempages / (THREAD_SIZE/PAGE_SIZE) / 2;
init_task.rlim[RLIMIT_NPROC].rlim_cur = max_threads/2;
init_task.rlim[RLIMIT_NPROC].rlim_max = max_threads/2;
#include <asm/uaccess.h>
-#define LOG_BUF_LEN (16384)
+#define LOG_BUF_LEN (16384*16)
#define LOG_BUF_MASK (LOG_BUF_LEN-1)
static char buf[1024];
#include <linux/sched.h>
#include <linux/errno.h>
#include <linux/mm.h>
-#include <linux/bigmem.h>
+#include <linux/highmem.h>
#include <asm/pgtable.h>
#include <asm/uaccess.h>
pgd_t * pgdir;
pmd_t * pgmiddle;
pte_t * pgtable;
- unsigned long page;
+ unsigned long mapnr;
+ unsigned long maddr;
+ struct page *page;
repeat:
pgdir = pgd_offset(vma->vm_mm, addr);
pgtable = pte_offset(pgmiddle, addr);
if (!pte_present(*pgtable))
goto fault_in_page;
- page = pte_page(*pgtable);
+ mapnr = pte_pagenr(*pgtable);
if (write && (!pte_write(*pgtable) || !pte_dirty(*pgtable)))
goto fault_in_page;
- if (MAP_NR(page) >= max_mapnr)
+ if (mapnr >= max_mapnr)
return 0;
+ page = mem_map + mapnr;
flush_cache_page(vma, addr);
- {
- void *src = (void *) (page + (addr & ~PAGE_MASK));
- void *dst = buf;
- if (write) {
- dst = src;
- src = buf;
- }
- src = (void *) kmap((unsigned long) src, KM_READ);
- dst = (void *) kmap((unsigned long) dst, KM_WRITE);
- memcpy(dst, src, len);
- kunmap((unsigned long) src, KM_READ);
- kunmap((unsigned long) dst, KM_WRITE);
+ if (write) {
+ maddr = kmap(page, KM_WRITE);
+ memcpy((char *)maddr + (addr & ~PAGE_MASK), buf, len);
+ flush_page_to_ram(maddr);
+ kunmap(maddr, KM_WRITE);
+ } else {
+ maddr = kmap(page, KM_READ);
+ memcpy(buf, (char *)maddr + (addr & ~PAGE_MASK), len);
+ flush_page_to_ram(maddr);
+ kunmap(maddr, KM_READ);
}
- flush_page_to_ram(page);
return len;
fault_in_page:
return 0;
bad_pgd:
- printk("ptrace: bad pgd in '%s' at %08lx (%08lx)\n", tsk->comm, addr, pgd_val(*pgdir));
+ pgd_ERROR(*pgdir);
return 0;
bad_pmd:
- printk("ptrace: bad pmd in '%s' at %08lx (%08lx)\n", tsk->comm, addr, pmd_val(*pgmiddle));
+ pmd_ERROR(*pgmiddle);
return 0;
}
O_TARGET := mm.o
O_OBJS := memory.o mmap.o filemap.o mprotect.o mlock.o mremap.o \
- vmalloc.o slab.o \
- swap.o vmscan.o page_io.o page_alloc.o swap_state.o swapfile.o
+ vmalloc.o slab.o bootmem.o swap.o vmscan.o page_io.o \
+ page_alloc.o swap_state.o swapfile.o
-ifeq ($(CONFIG_BIGMEM),y)
-O_OBJS += bigmem.o
+ifeq ($(CONFIG_HIGHMEM),y)
+O_OBJS += highmem.o
endif
include $(TOPDIR)/Rules.make
+++ /dev/null
-/*
- * BIGMEM common code and variables.
- *
- * (C) 1999 Andrea Arcangeli, SuSE GmbH, andrea@suse.de
- * Gerhard Wichert, Siemens AG, Gerhard.Wichert@pdb.siemens.de
- */
-
-#include <linux/mm.h>
-#include <linux/pagemap.h>
-#include <linux/bigmem.h>
-
-unsigned long bigmem_mapnr;
-int nr_free_bigpages = 0;
-
-struct page * prepare_bigmem_swapout(struct page * page)
-{
- /* if this is a bigmem page so it can't be swapped out directly
- otherwise the b_data buffer addresses will break
- the lowlevel device drivers. */
- if (PageBIGMEM(page)) {
- unsigned long regular_page;
- unsigned long vaddr;
-
- regular_page = __get_free_page(GFP_ATOMIC);
- if (!regular_page)
- return NULL;
-
- vaddr = kmap(page_address(page), KM_READ);
- copy_page(regular_page, vaddr);
- kunmap(vaddr, KM_READ);
-
- /* ok, we can just forget about our bigmem page since
- we stored its data into the new regular_page. */
- __free_page(page);
-
- page = MAP_NR(regular_page) + mem_map;
- }
- return page;
-}
-
-struct page * replace_with_bigmem(struct page * page)
-{
- if (!PageBIGMEM(page) && nr_free_bigpages) {
- unsigned long kaddr;
-
- kaddr = __get_free_page(GFP_ATOMIC|GFP_BIGMEM);
- if (kaddr) {
- struct page * bigmem_page;
-
- bigmem_page = MAP_NR(kaddr) + mem_map;
- if (PageBIGMEM(bigmem_page)) {
- unsigned long vaddr;
-
- vaddr = kmap(kaddr, KM_WRITE);
- copy_page(vaddr, page_address(page));
- kunmap(vaddr, KM_WRITE);
-
- /* Preserve the caching of the swap_entry. */
- bigmem_page->offset = page->offset;
-
- /* We can just forget the old page since
- we stored its data into the new
- bigmem_page. */
- __free_page(page);
-
- page = bigmem_page;
- }
- }
- }
- return page;
-}
--- /dev/null
+/*
+ * linux/mm/initmem.c
+ *
+ * Copyright (C) 1999 Ingo Molnar
+ *
+ * simple boot-time physical memory area allocator and
+ * free memory collector. It's used to deal with reserved
+ * system memory and memory holes as well.
+ */
+
+#include <linux/config.h>
+#include <linux/mm.h>
+#include <linux/kernel_stat.h>
+#include <linux/swap.h>
+#include <linux/swapctl.h>
+#include <linux/interrupt.h>
+#include <linux/init.h>
+#include <linux/bootmem.h>
+
+/*
+ * Pointer to a bitmap - the bits represent all physical memory pages
+ * from physical address 0 to physical address end_mem.
+ *
+ * Access to this subsystem has to be serialized externally. (this is
+ * true for the boot process anyway)
+ */
+static void * bootmem_map = NULL;
+unsigned long max_low_pfn;
+
+/*
+ * Called once to set up the allocator itself.
+ */
+unsigned long __init init_bootmem (unsigned long start, unsigned long pages)
+{
+ unsigned long mapsize = (pages+7)/8;
+
+ if (bootmem_map)
+ BUG();
+ bootmem_map = __va(start << PAGE_SHIFT);
+ max_low_pfn = pages;
+
+ /*
+ * Initially all pages are reserved - setup_arch() has to
+ * register free RAM areas explicitly.
+ */
+ memset(bootmem_map, 0xff, mapsize);
+
+ return mapsize;
+}
+
+/*
+ * Marks a particular physical memory range as usable. Usable RAM
+ * might be used for boot-time allocations - or it might get added
+ * to the free page pool later on.
+ */
+void __init reserve_bootmem (unsigned long addr, unsigned long size)
+{
+ unsigned long i;
+ /*
+ * round up, partially reserved pages are considered
+ * fully reserved.
+ */
+ unsigned long end = (addr + size + PAGE_SIZE-1)/PAGE_SIZE;
+
+ if (!bootmem_map) BUG();
+ if (!size) BUG();
+
+ if (end > max_low_pfn)
+ BUG();
+ for (i = addr/PAGE_SIZE; i < end; i++)
+ if (test_and_set_bit(i, bootmem_map))
+ BUG();
+}
+
+void __init free_bootmem (unsigned long addr, unsigned long size)
+{
+ unsigned long i;
+ /*
+ * round down end of usable mem, partially free pages are
+ * considered reserved.
+ */
+ unsigned long end = (addr + size)/PAGE_SIZE;
+
+ if (!bootmem_map) BUG();
+ if (!size) BUG();
+
+ if (end > max_low_pfn)
+ BUG();
+ for (i = addr/PAGE_SIZE; i < end; i++) {
+ if (!test_and_clear_bit(i, bootmem_map))
+ BUG();
+ }
+}
+
+/*
+ * We 'merge' subsequent allocations to save space. We might 'lose'
+ * some fraction of a page if allocations cannot be satisfied due to
+ * size constraints on boxes where there is physical RAM space
+ * fragmentation - in these cases * (mostly large memory boxes) this
+ * is not a problem.
+ *
+ * On low memory boxes we get it right in 100% of the cases.
+ */
+static unsigned long last_pos = 0;
+static unsigned long last_offset = 0;
+
+/*
+ * alignment has to be a power of 2 value.
+ */
+void * __init __alloc_bootmem (unsigned long size, unsigned long align)
+{
+ int area = 0;
+ unsigned long i, start = 0, reserved;
+ void *ret;
+ unsigned long offset, remaining_size;
+ unsigned long areasize;
+
+ if (!bootmem_map) BUG();
+ if (!size) BUG();
+
+ areasize = (size+PAGE_SIZE-1)/PAGE_SIZE;
+
+ for (i = 0; i < max_low_pfn; i++) {
+ reserved = test_bit(i, bootmem_map);
+ if (!reserved) {
+ if (!area) {
+ area = 1;
+ start = i;
+ }
+ if (i - start + 1 == areasize)
+ goto found;
+ } else {
+ area = 0;
+ start = -1;
+ }
+ }
+ BUG();
+found:
+ if (start >= max_low_pfn)
+ BUG();
+
+ /*
+ * Is the next page of the previous allocation-end the start
+ * of this allocation's buffer? If yes then we can 'merge'
+ * the previous partial page with this allocation.
+ */
+ if (last_offset && (last_pos+1 == start)) {
+ offset = (last_offset+align-1) & ~(align-1);
+ if (offset > PAGE_SIZE)
+ BUG();
+ remaining_size = PAGE_SIZE-offset;
+ if (remaining_size > PAGE_SIZE)
+ BUG();
+ if (size < remaining_size) {
+ areasize = 0;
+ // last_pos unchanged
+ last_offset = offset+size;
+ ret = __va(last_pos*PAGE_SIZE + offset);
+ } else {
+ size -= remaining_size;
+ areasize = (size+PAGE_SIZE-1)/PAGE_SIZE;
+ ret = __va(last_pos*PAGE_SIZE + offset);
+ last_pos = start+areasize-1;
+ last_offset = size;
+ }
+ last_offset &= ~PAGE_MASK;
+ } else {
+ last_pos = start + areasize - 1;
+ last_offset = size & ~PAGE_MASK;
+ ret = __va(start * PAGE_SIZE);
+ }
+ /*
+ * Reserve the area now:
+ */
+ for (i = start; i < start+areasize; i++)
+ if (test_and_set_bit(i, bootmem_map))
+ BUG();
+
+ return ret;
+}
+
+unsigned long __init free_all_bootmem (void)
+{
+ struct page * page;
+ unsigned long i, count, total = 0;
+
+ if (!bootmem_map) BUG();
+
+ printk("freeing all bootmem().\n");
+ page = mem_map;
+ count = 0;
+ for (i = 0; i < max_low_pfn; i++, page++) {
+ if (!test_bit(i, bootmem_map)) {
+ count++;
+ ClearPageReserved(page);
+ set_page_count(page, 1);
+ __free_page(page);
+ }
+ }
+ total += count;
+ /*
+ * Now free the allocator bitmap itself, it's not
+ * needed anymore:
+ */
+ page = mem_map + MAP_NR(bootmem_map);
+ count = 0;
+ for (i = 0; i < (max_low_pfn/8 + PAGE_SIZE-1)/PAGE_SIZE; i++,page++) {
+ count++;
+ ClearPageReserved(page);
+ set_page_count(page, 1);
+ __free_page(page);
+ }
+ total += count;
+ bootmem_map = NULL;
+
+ return total;
+}
#include <linux/swapctl.h>
#include <linux/slab.h>
#include <linux/init.h>
+#include <linux/highmem.h>
#include <asm/pgtable.h>
#include <asm/uaccess.h>
atomic_dec(&page_cache_size);
}
-static void remove_page_from_inode_queue(struct page * page)
-{
- struct inode * inode = page->inode;
- struct page *prev, *next;
-
- inode->i_nrpages--;
- next = page->next;
- prev = page->prev;
- if (inode->i_pages == page)
- inode->i_pages = next;
- if (next)
- next->prev = prev;
- if (prev)
- prev->next = next;
- page->next = NULL;
- page->prev = NULL;
-}
-
/*
* Remove a page from the page cache and free it. Caller has to make
* sure the page is locked and that nobody else uses it - or that usage
void invalidate_inode_pages(struct inode * inode)
{
- struct page ** p;
+ struct list_head *head, *curr;
struct page * page;
+ head = &inode->i_pages;
repeat:
spin_lock(&pagecache_lock);
- p = &inode->i_pages;
- while ((page = *p) != NULL) {
+ curr = head->next;
+
+ while (curr != head) {
+ page = list_entry(curr, struct page, list);
+ curr = curr->next;
get_page(page);
if (TryLockPage(page)) {
spin_unlock(&pagecache_lock);
UnlockPage(page);
page_cache_release(page);
page_cache_release(page);
-
}
spin_unlock(&pagecache_lock);
}
*/
void truncate_inode_pages(struct inode * inode, unsigned long start)
{
- struct page ** p;
+ struct list_head *head, *curr;
+ unsigned long offset;
struct page * page;
int partial = 0;
repeat:
+ head = &inode->i_pages;
spin_lock(&pagecache_lock);
- p = &inode->i_pages;
- while ((page = *p) != NULL) {
- unsigned long offset = page->offset;
+ curr = head->next;
+ while (curr != head) {
+
+ page = list_entry(curr, struct page, list);
+ curr = curr->next;
+
+ offset = page->offset;
/* page wholly truncated - free it */
if (offset >= start) {
*/
goto repeat;
}
- p = &page->next;
/*
* there is only one partial page possible.
*/
offset = start - offset;
/* partial truncate, clear end of page */
if (offset < PAGE_CACHE_SIZE) {
- unsigned long address;
get_page(page);
spin_unlock(&pagecache_lock);
lock_page(page);
partial = 1;
- address = page_address(page);
- memset((void *) (offset + address), 0, PAGE_CACHE_SIZE - offset);
- flush_page_to_ram(address);
-
+ memclear_highpage_flush(page, offset,
+ PAGE_CACHE_SIZE-offset);
if (inode->i_op->flushpage)
inode->i_op->flushpage(inode, page, offset);
/*
/* don't account passes over not DMA pages */
if ((gfp_mask & __GFP_DMA) && !PageDMA(page))
goto dispose_continue;
- if (!(gfp_mask & __GFP_BIGMEM) && PageBIGMEM(page))
+ if (!(gfp_mask & __GFP_HIGHMEM) && PageHighMem(page))
goto dispose_continue;
count--;
goto unlock_continue;
/* page was locked, inode can't go away under us */
if (!page->inode) {
- atomic_sub(PAGE_CACHE_SIZE, &buffermem);
+ atomic_dec(&buffermem_pages);
goto made_buffer_progress;
}
spin_lock(&pagecache_lock);
static int do_buffer_fdatasync(struct inode *inode, unsigned long start, unsigned long end, int (*fn)(struct page *))
{
- struct page *next;
+ struct list_head *head, *curr;
+ struct page *page;
int retval = 0;
+ head = &inode->i_pages;
start &= PAGE_MASK;
spin_lock(&pagecache_lock);
- next = inode->i_pages;
- while (next) {
- struct page *page = next;
- next = page->next;
+ curr = head->next;
+ while (curr != head) {
+ page = list_entry(curr, struct page, list);
+ curr = curr->next;
if (!page->buffers)
continue;
if (page->offset >= end)
UnlockPage(page);
spin_lock(&pagecache_lock);
- next = page->next;
+ curr = page->list.next;
page_cache_release(page);
}
spin_unlock(&pagecache_lock);
struct inode * inode, unsigned long offset,
struct page **hash)
{
+ struct page *alias;
unsigned long flags;
flags = page->flags & ~((1 << PG_uptodate) | (1 << PG_error) | (1 << PG_referenced));
add_page_to_inode_queue(inode, page);
__add_page_to_hash_queue(page, hash);
lru_cache_add(page);
+ alias = __find_page_nolock(inode, offset, *hash);
+ if (alias != page)
+ BUG();
}
void add_to_page_cache(struct page * page, struct inode * inode, unsigned long offset)
*/
static inline void page_cache_read(struct file * file, unsigned long offset)
{
- unsigned long new_page;
struct inode *inode = file->f_dentry->d_inode;
- struct page ** hash = page_hash(inode, offset);
- struct page * page;
+ struct page **hash = page_hash(inode, offset);
+ struct page *page;
spin_lock(&pagecache_lock);
page = __find_page_nolock(inode, offset, *hash);
if (page)
return;
- new_page = page_cache_alloc();
- if (!new_page)
+ page = page_cache_alloc();
+ if (!page)
return;
- page = page_cache_entry(new_page);
if (!add_to_page_cache_unique(page, inode, offset, hash)) {
inode->i_op->readpage(file, page);
page_cache_release(page);
return;
}
-
/*
* We arrive here in the unlikely event that someone
* raced with us and added our page to the cache first.
*/
- page_cache_free(new_page);
+ page_cache_free(page);
return;
}
{
struct dentry *dentry = filp->f_dentry;
struct inode *inode = dentry->d_inode;
- size_t pos, pgpos, page_cache;
+ size_t pos, pgpos;
+ struct page *cached_page;
int reada_ok;
int error;
int max_readahead = get_max_readahead(inode);
- page_cache = 0;
-
+ cached_page = NULL;
pos = *ppos;
pgpos = pos & PAGE_CACHE_MASK;
/*
* "pos" here (the actor routine has to update the user buffer
* pointers and the remaining count).
*/
- nr = actor(desc, (const char *) (page_address(page) + offset), nr);
+ nr = actor(desc, page, offset, nr);
pos += nr;
page_cache_release(page);
if (nr && desc->count)
*
* We get here with the page cache lock held.
*/
- if (!page_cache) {
+ if (!cached_page) {
spin_unlock(&pagecache_lock);
- page_cache = page_cache_alloc();
- if (!page_cache) {
+ cached_page = page_cache_alloc();
+ if (!cached_page) {
desc->error = -ENOMEM;
break;
}
/*
* Ok, add the new page to the hash-queues...
*/
- page = page_cache_entry(page_cache);
+ page = cached_page;
__add_to_page_cache(page, inode, pos & PAGE_CACHE_MASK, hash);
spin_unlock(&pagecache_lock);
+ cached_page = NULL;
- page_cache = 0;
goto readpage;
}
*ppos = pos;
filp->f_reada = 1;
- if (page_cache)
- page_cache_free(page_cache);
+ if (cached_page)
+ page_cache_free(cached_page);
UPDATE_ATIME(inode);
}
-static int file_read_actor(read_descriptor_t * desc, const char *area, unsigned long size)
+static int file_read_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size)
{
- unsigned long left;
- unsigned long count = desc->count;
+ unsigned long kaddr;
+ unsigned long left, count = desc->count;
if (size > count)
size = count;
- left = __copy_to_user(desc->buf, area, size);
+ /*
+ * FIXME: We cannot yet sleep with kmaps held.
+ */
+ kaddr = kmap(page, KM_READ);
+ left = __copy_to_user(desc->buf, (void *)(kaddr+offset), size);
+ kunmap(kaddr, KM_READ);
+
if (left) {
size -= left;
desc->error = -EFAULT;
return retval;
}
-static int file_send_actor(read_descriptor_t * desc, const char *area, unsigned long size)
+static int file_send_actor(read_descriptor_t * desc, struct page *page, unsigned long offset , unsigned long size)
{
+ unsigned long kaddr;
ssize_t written;
unsigned long count = desc->count;
struct file *file = (struct file *) desc->buf;
size = count;
old_fs = get_fs();
set_fs(KERNEL_DS);
- written = file->f_op->write(file, area, size, &file->f_pos);
+ kaddr = kmap(page, KM_READ);
+ written = file->f_op->write(file, (char *)kaddr + offset, size, &file->f_pos);
+ kunmap(kaddr, KM_READ);
set_fs(old_fs);
if (written < 0) {
desc->error = written;
* XXX - at some point, this should return unique values to indicate to
* the caller whether this is EIO, OOM, or SIGBUS.
*/
-static unsigned long filemap_nopage(struct vm_area_struct * area,
+static struct page * filemap_nopage(struct vm_area_struct * area,
unsigned long address, int no_share)
{
- struct file * file = area->vm_file;
- struct dentry * dentry = file->f_dentry;
- struct inode * inode = dentry->d_inode;
- struct page * page, **hash;
- unsigned long old_page;
+ struct file *file = area->vm_file;
+ struct dentry *dentry = file->f_dentry;
+ struct inode *inode = dentry->d_inode;
+ struct page *page, **hash, *old_page;
unsigned long offset = address - area->vm_start + area->vm_offset;
*/
if ((offset >= inode->i_size) &&
(area->vm_flags & VM_SHARED) && (area->vm_mm == current->mm))
- return 0;
+ return NULL;
/*
* Do we have something in the page cache already?
* Found the page and have a reference on it, need to check sharing
* and possibly copy it over to another page..
*/
- old_page = page_address(page);
+ old_page = page;
if (no_share) {
- unsigned long new_page = page_cache_alloc();
+ struct page *new_page = page_cache_alloc();
if (new_page) {
- copy_page(new_page, old_page);
+ if (PageHighMem(new_page) || PageHighMem(old_page))
+ BUG();
+ copy_highpage(new_page, old_page);
flush_page_to_ram(new_page);
}
page_cache_release(page);
* mm layer so, possibly freeing the page cache page first.
*/
page_cache_release(page);
- return 0;
+ return NULL;
}
/*
* if the disk is full.
*/
static inline int do_write_page(struct inode * inode, struct file * file,
- const char * page_addr, unsigned long offset)
+ struct page * page, unsigned long offset)
{
int retval;
unsigned long size;
int (*writepage) (struct file *, struct page *);
- struct page * page;
size = offset + PAGE_SIZE;
/* refuse to extend file size.. */
size -= offset;
retval = -EIO;
writepage = inode->i_op->writepage;
- page = mem_map + MAP_NR(page_addr);
lock_page(page);
retval = writepage(file, page);
static int filemap_write_page(struct vm_area_struct * vma,
unsigned long offset,
- unsigned long page,
+ struct page * page,
int wait)
{
int result;
* and file could be released ... increment the count to be safe.
*/
get_file(file);
- result = do_write_page(inode, file, (const char *) page, offset);
+ result = do_write_page(inode, file, page, offset);
fput(file);
return result;
}
extern void wakeup_bdflush(int);
int filemap_swapout(struct vm_area_struct * vma, struct page * page)
{
- int retval = filemap_write_page(vma, page->offset, page_address(page), 0);
+ int retval = filemap_write_page(vma, page->offset, page, 0);
wakeup_bdflush(0);
return retval;
}
unsigned long address, unsigned int flags)
{
pte_t pte = *ptep;
- unsigned long pageaddr;
struct page *page;
int error;
flush_cache_page(vma, address);
set_pte(ptep, pte_mkclean(pte));
flush_tlb_page(vma, address);
- pageaddr = pte_page(pte);
- page = page_cache_entry(pageaddr);
+ page = pte_page(pte);
get_page(page);
} else {
if (pte_none(pte))
pte_clear(ptep);
flush_tlb_page(vma, address);
if (!pte_present(pte)) {
- swap_free(pte_val(pte));
+ swap_free(pte);
return 0;
}
- pageaddr = pte_page(pte);
+ page = pte_page(pte);
if (!pte_dirty(pte) || flags == MS_INVALIDATE) {
- page_cache_free(pageaddr);
+ page_cache_free(page);
return 0;
}
}
- error = filemap_write_page(vma, address - vma->vm_start + vma->vm_offset, pageaddr, 1);
- page_cache_free(pageaddr);
+ if (PageHighMem(page))
+ BUG();
+ error = filemap_write_page(vma, address - vma->vm_start + vma->vm_offset, page, 1);
+ page_cache_free(page);
return error;
}
if (pmd_none(*pmd))
return 0;
if (pmd_bad(*pmd)) {
- printk("filemap_sync_pte_range: bad pmd (%08lx)\n", pmd_val(*pmd));
+ pmd_ERROR(*pmd);
pmd_clear(pmd);
return 0;
}
error |= filemap_sync_pte(pte, vma, address + offset, flags);
address += PAGE_SIZE;
pte++;
- } while (address < end);
+ } while (address && (address < end));
return error;
}
if (pgd_none(*pgd))
return 0;
if (pgd_bad(*pgd)) {
- printk("filemap_sync_pmd_range: bad pgd (%08lx)\n", pgd_val(*pgd));
+ pgd_ERROR(*pgd);
pgd_clear(pgd);
return 0;
}
error |= filemap_sync_pte_range(pmd, address, end - address, vma, offset, flags);
address = (address + PMD_SIZE) & PMD_MASK;
pmd++;
- } while (address < end);
+ } while (address && (address < end));
return error;
}
dir = pgd_offset(vma->vm_mm, address);
flush_cache_range(vma->vm_mm, end - size, end);
- while (address < end) {
+ if (address >= end)
+ BUG();
+ do {
error |= filemap_sync_pmd_range(dir, address, end - address, vma, flags);
address = (address + PGDIR_SIZE) & PGDIR_MASK;
dir++;
- }
+ } while (address && (address < end));
flush_tlb_range(vma->vm_mm, end - size, end);
return error;
}
struct inode *inode = dentry->d_inode;
unsigned long pos = *ppos;
unsigned long limit = current->rlim[RLIMIT_FSIZE].rlim_cur;
- struct page *page, **hash;
- unsigned long page_cache = 0;
+ struct page *page, **hash, *cached_page;
unsigned long written;
long status;
int err;
+ cached_page = NULL;
+
down(&inode->i_sem);
err = file->f_error;
if (err) {
repeat_find:
page = __find_lock_page(inode, pgpos, hash);
if (!page) {
- if (!page_cache) {
- page_cache = page_cache_alloc();
- if (page_cache)
+ if (!cached_page) {
+ cached_page = page_cache_alloc();
+ if (cached_page)
goto repeat_find;
status = -ENOMEM;
break;
}
- page = page_cache_entry(page_cache);
+ page = cached_page;
if (add_to_page_cache_unique(page,inode,pgpos,hash))
goto repeat_find;
- page_cache = 0;
+ cached_page = NULL;
}
/* We have exclusive IO access to the page.. */
}
*ppos = pos;
- if (page_cache)
- page_cache_free(page_cache);
+ if (cached_page)
+ page_cache_free(cached_page);
err = written ? written : status;
out:
page_cache_release(page);
}
-void __init page_cache_init(unsigned long memory_size)
+void __init page_cache_init(unsigned long mempages)
{
unsigned long htable_size, order;
- htable_size = memory_size >> PAGE_SHIFT;
+ htable_size = mempages;
htable_size *= sizeof(struct page *);
for(order = 0; (PAGE_SIZE << order) < htable_size; order++)
;
(1 << page_hash_bits), order, (PAGE_SIZE << order));
if (!page_hash_table)
panic("Failed to allocate page hash table\n");
- memset(page_hash_table, 0, PAGE_HASH_SIZE * sizeof(struct page *));
+ memset((void *)page_hash_table, 0, PAGE_HASH_SIZE * sizeof(struct page *));
}
--- /dev/null
+/*
+ * High memory handling common code and variables.
+ *
+ * (C) 1999 Andrea Arcangeli, SuSE GmbH, andrea@suse.de
+ * Gerhard Wichert, Siemens AG, Gerhard.Wichert@pdb.siemens.de
+ *
+ * Redesigned the x86 32-bit VM architecture to deal with
+ * 64-bit physical space. With current x86 CPUs this
+ * means up to 64 Gigabytes physical RAM.
+ *
+ * Copyright (C) 1999 Ingo Molnar <mingo@redhat.com>
+ */
+
+#include <linux/mm.h>
+#include <linux/pagemap.h>
+#include <linux/highmem.h>
+
+unsigned long highmem_mapnr;
+unsigned long nr_free_highpages = 0;
+
+struct page * prepare_highmem_swapout(struct page * page)
+{
+ unsigned long regular_page;
+ unsigned long vaddr;
+ /*
+ * If this is a highmem page so it can't be swapped out directly
+ * otherwise the b_data buffer addresses will break
+ * the lowlevel device drivers.
+ */
+ if (!PageHighMem(page))
+ return page;
+
+ regular_page = __get_free_page(GFP_ATOMIC);
+ if (!regular_page)
+ return NULL;
+
+ vaddr = kmap(page, KM_READ);
+ copy_page((void *)regular_page, (void *)vaddr);
+ kunmap(vaddr, KM_READ);
+
+ /*
+ * ok, we can just forget about our highmem page since
+ * we stored its data into the new regular_page.
+ */
+ __free_page(page);
+
+ return mem_map + MAP_NR(regular_page);
+}
+
+struct page * replace_with_highmem(struct page * page)
+{
+ struct page *highpage;
+ unsigned long vaddr;
+
+ if (PageHighMem(page) || !nr_free_highpages)
+ return page;
+
+ highpage = get_free_highpage(GFP_ATOMIC|__GFP_HIGHMEM);
+ if (!highpage)
+ return page;
+ if (!PageHighMem(highpage)) {
+ __free_page(highpage);
+ return page;
+ }
+
+ vaddr = kmap(highpage, KM_WRITE);
+ copy_page((void *)vaddr, (void *)page_address(page));
+ kunmap(vaddr, KM_WRITE);
+
+ /* Preserve the caching of the swap_entry. */
+ highpage->offset = page->offset;
+ highpage->inode = page->inode;
+
+ /*
+ * We can just forget the old page since
+ * we stored its data into the new highmem-page.
+ */
+ __free_page(page);
+
+ return highpage;
+}
#include <linux/smp_lock.h>
#include <linux/swapctl.h>
#include <linux/iobuf.h>
-#include <linux/bigmem.h>
+#include <linux/highmem.h>
#include <asm/uaccess.h>
#include <asm/pgtable.h>
unsigned long max_mapnr = 0;
unsigned long num_physpages = 0;
void * high_memory = NULL;
+struct page *highmem_start_page;
/*
* We special-case the C-O-W ZERO_PAGE, because it's such
* a common occurrence (no need to read the page to know
* that it's zero - better for the cache and memory subsystem).
*/
-static inline void copy_cow_page(unsigned long from, unsigned long to)
+static inline void copy_cow_page(struct page * from, struct page * to)
{
if (from == ZERO_PAGE(to)) {
- clear_bigpage(to);
+ clear_highpage(to);
return;
}
- copy_bigpage(to, from);
+ copy_highpage(to, from);
}
mem_map_t * mem_map = NULL;
if (pmd_none(*dir))
return;
if (pmd_bad(*dir)) {
- printk("free_one_pmd: bad directory entry %08lx\n", pmd_val(*dir));
+ pmd_ERROR(*dir);
pmd_clear(dir);
return;
}
if (pgd_none(*dir))
return;
if (pgd_bad(*dir)) {
- printk("free_one_pgd: bad directory entry %08lx\n", pgd_val(*dir));
+ pgd_ERROR(*dir);
pgd_clear(dir);
return;
}
if (pgd_none(*src_pgd))
goto skip_copy_pmd_range;
if (pgd_bad(*src_pgd)) {
- printk("copy_pmd_range: bad pgd (%08lx)\n",
- pgd_val(*src_pgd));
+ pgd_ERROR(*src_pgd);
pgd_clear(src_pgd);
skip_copy_pmd_range: address = (address + PGDIR_SIZE) & PGDIR_MASK;
- if (address >= end)
+ if (!address || (address >= end))
goto out;
continue;
}
if (pmd_none(*src_pmd))
goto skip_copy_pte_range;
if (pmd_bad(*src_pmd)) {
- printk("copy_pte_range: bad pmd (%08lx)\n", pmd_val(*src_pmd));
+ pmd_ERROR(*src_pmd);
pmd_clear(src_pmd);
skip_copy_pte_range: address = (address + PMD_SIZE) & PMD_MASK;
if (address >= end)
if (pte_none(pte))
goto cont_copy_pte_range;
if (!pte_present(pte)) {
- swap_duplicate(pte_val(pte));
+ swap_duplicate(pte);
set_pte(dst_pte, pte);
goto cont_copy_pte_range;
}
- page_nr = MAP_NR(pte_page(pte));
+ page_nr = pte_pagenr(pte);
if (page_nr >= max_mapnr ||
PageReserved(mem_map+page_nr)) {
set_pte(dst_pte, pte);
static inline int free_pte(pte_t page)
{
if (pte_present(page)) {
- unsigned long addr = pte_page(page);
- if (MAP_NR(addr) >= max_mapnr || PageReserved(mem_map+MAP_NR(addr)))
+ unsigned long nr = pte_pagenr(page);
+ if (nr >= max_mapnr || PageReserved(mem_map+nr))
return 0;
/*
* free_page() used to be able to clear swap cache
* entries. We may now have to do it manually.
*/
- free_page_and_swap_cache(addr);
+ free_page_and_swap_cache(mem_map+nr);
return 1;
}
- swap_free(pte_val(page));
+ swap_free(page);
return 0;
}
if (pmd_none(*pmd))
return 0;
if (pmd_bad(*pmd)) {
- printk("zap_pte_range: bad pmd (%08lx)\n", pmd_val(*pmd));
+ pmd_ERROR(*pmd);
pmd_clear(pmd);
return 0;
}
if (pgd_none(*dir))
return 0;
if (pgd_bad(*dir)) {
- printk("zap_pmd_range: bad pgd (%08lx)\n", pgd_val(*dir));
+ pgd_ERROR(*dir);
pgd_clear(dir);
return 0;
}
* even if kswapd happened to be looking at this
* process we _want_ it to get stuck.
*/
+ if (address >= end)
+ BUG();
spin_lock(&mm->page_table_lock);
- while (address < end) {
+ do {
freed += zap_pmd_range(mm, dir, address, end - address);
address = (address + PGDIR_SIZE) & PGDIR_MASK;
dir++;
- }
+ } while (address && (address < end));
spin_unlock(&mm->page_table_lock);
/*
* Update rss for the mm_struct (not necessarily current->mm)
/*
* Do a quick page-table lookup for a single page.
*/
-static unsigned long follow_page(unsigned long address)
+static struct page * follow_page(unsigned long address)
{
pgd_t *pgd;
pmd_t *pmd;
pmd = pmd_offset(pgd, address);
if (pmd) {
pte_t * pte = pte_offset(pmd, address);
- if (pte && pte_present(*pte)) {
+ if (pte && pte_present(*pte))
return pte_page(*pte);
- }
}
printk(KERN_ERR "Missing page in follow_page\n");
- return 0;
+ return NULL;
}
/*
* Given a physical address, is there a useful struct page pointing to it?
*/
-static struct page * get_page_map(unsigned long page)
+struct page * get_page_map(struct page *page)
{
- struct page *map;
-
if (MAP_NR(page) >= max_mapnr)
return 0;
if (page == ZERO_PAGE(page))
return 0;
- map = mem_map + MAP_NR(page);
- if (PageReserved(map))
+ if (PageReserved(page))
return 0;
- return map;
+ return page;
}
/*
int err;
struct mm_struct * mm;
struct vm_area_struct * vma = 0;
- unsigned long page;
struct page * map;
int doublepage = 0;
int repeat = 0;
if (handle_mm_fault(current, vma, ptr, (rw==READ)) <= 0)
goto out_unlock;
spin_lock(&mm->page_table_lock);
- page = follow_page(ptr);
- if (!page) {
+ map = follow_page(ptr);
+ if (!map) {
dprintk (KERN_ERR "Missing page in map_user_kiobuf\n");
- map = NULL;
goto retry;
}
- map = get_page_map(page);
+ map = get_page_map(map);
if (map) {
if (TryLockPage(map)) {
goto retry;
atomic_inc(&map->count);
}
spin_unlock(&mm->page_table_lock);
- dprintk ("Installing page %p %p: %d\n", (void *)page, map, i);
- iobuf->pagelist[i] = page;
iobuf->maplist[i] = map;
iobuf->nr_pages = ++i;
if (end > PMD_SIZE)
end = PMD_SIZE;
do {
- pte_t zero_pte = pte_wrprotect(mk_pte(ZERO_PAGE(address),
- prot));
+ pte_t zero_pte = pte_wrprotect(mk_pte(ZERO_PAGE(address), prot));
pte_t oldpage = *pte;
set_pte(pte, zero_pte);
forget_pte(oldpage);
address += PAGE_SIZE;
pte++;
- } while (address < end);
+ } while (address && (address < end));
}
static inline int zeromap_pmd_range(pmd_t * pmd, unsigned long address,
zeromap_pte_range(pte, address, end - address, prot);
address = (address + PMD_SIZE) & PMD_MASK;
pmd++;
- } while (address < end);
+ } while (address && (address < end));
return 0;
}
dir = pgd_offset(current->mm, address);
flush_cache_range(current->mm, beg, end);
- while (address < end) {
+ if (address >= end)
+ BUG();
+ do {
pmd_t *pmd = pmd_alloc(dir, address);
error = -ENOMEM;
if (!pmd)
break;
address = (address + PGDIR_SIZE) & PGDIR_MASK;
dir++;
- }
+ } while (address && (address < end));
flush_tlb_range(current->mm, beg, end);
return error;
}
address += PAGE_SIZE;
phys_addr += PAGE_SIZE;
pte++;
- } while (address < end);
+ } while (address && (address < end));
}
static inline int remap_pmd_range(pmd_t * pmd, unsigned long address, unsigned long size,
remap_pte_range(pte, address, end - address, address + phys_addr, prot);
address = (address + PMD_SIZE) & PMD_MASK;
pmd++;
- } while (address < end);
+ } while (address && (address < end));
return 0;
}
phys_addr -= from;
dir = pgd_offset(current->mm, from);
flush_cache_range(current->mm, beg, end);
- while (from < end) {
+ if (from >= end)
+ BUG();
+ do {
pmd_t *pmd = pmd_alloc(dir, from);
error = -ENOMEM;
if (!pmd)
break;
from = (from + PGDIR_SIZE) & PGDIR_MASK;
dir++;
- }
+ } while (from && (from < end));
flush_tlb_range(current->mm, beg, end);
return error;
}
* This routine is used to map in a page into an address space: needed by
* execve() for the initial stack and environment pages.
*/
-unsigned long put_dirty_page(struct task_struct * tsk, unsigned long page, unsigned long address)
+struct page * put_dirty_page(struct task_struct * tsk, struct page *page,
+ unsigned long address)
{
pgd_t * pgd;
pmd_t * pmd;
pte_t * pte;
- if (MAP_NR(page) >= max_mapnr)
- printk("put_dirty_page: trying to put page %08lx at %08lx\n",page,address);
- if (page_count(mem_map + MAP_NR(page)) != 1)
- printk("mem_map disagrees with %08lx at %08lx\n",page,address);
- pgd = pgd_offset(tsk->mm,address);
+ if (page_count(page) != 1)
+ printk("mem_map disagrees with %p at %08lx\n", page, address);
+ pgd = pgd_offset(tsk->mm, address);
pmd = pmd_alloc(pgd, address);
if (!pmd) {
- free_page(page);
+ __free_page(page);
oom(tsk);
return 0;
}
pte = pte_alloc(pmd, address);
if (!pte) {
- free_page(page);
+ __free_page(page);
oom(tsk);
return 0;
}
if (!pte_none(*pte)) {
- printk("put_dirty_page: pte %08lx already exists\n",
- pte_val(*pte));
- free_page(page);
+ pte_ERROR(*pte);
+ __free_page(page);
return 0;
}
- flush_page_to_ram(page);
- set_pte(pte, pte_mkwrite(pte_mkdirty(mk_pte(page, PAGE_COPY))));
+ flush_page_to_ram(pte_page(page));
+ set_pte(pte, pte_mkwrite(page_pte_prot(page, PAGE_COPY)));
/* no need for flush_tlb */
return page;
}
static int do_wp_page(struct task_struct * tsk, struct vm_area_struct * vma,
unsigned long address, pte_t *page_table, pte_t pte)
{
- unsigned long old_page, new_page;
- struct page * page;
+ unsigned long map_nr;
+ struct page *old_page, *new_page;
- old_page = pte_page(pte);
- if (MAP_NR(old_page) >= max_mapnr)
+ map_nr = pte_pagenr(pte);
+ if (map_nr >= max_mapnr)
goto bad_wp_page;
tsk->min_flt++;
- page = mem_map + MAP_NR(old_page);
+ old_page = mem_map + map_nr;
/*
* We can avoid the copy if:
* in which case we can remove the page
* from the swap cache.
*/
- switch (page_count(page)) {
+ switch (page_count(old_page)) {
case 2:
- if (!PageSwapCache(page))
+ if (!PageSwapCache(old_page))
break;
- if (swap_count(page->offset) != 1)
+ if (swap_count(old_page) != 1)
break;
- delete_from_swap_cache(page);
+ delete_from_swap_cache(old_page);
/* FallThrough */
case 1:
flush_cache_page(vma, address);
* Ok, we need to copy. Oh, well..
*/
spin_unlock(&tsk->mm->page_table_lock);
- new_page = __get_free_page(GFP_BIGUSER);
+ new_page = get_free_highpage(GFP_HIGHUSER);
if (!new_page)
return -1;
spin_lock(&tsk->mm->page_table_lock);
* Re-check the pte - we dropped the lock
*/
if (pte_val(*page_table) == pte_val(pte)) {
- if (PageReserved(page))
+ if (PageReserved(old_page))
++vma->vm_mm->rss;
- copy_cow_page(old_page,new_page);
+ copy_cow_page(old_page, new_page);
flush_page_to_ram(new_page);
flush_cache_page(vma, address);
set_pte(page_table, pte_mkwrite(pte_mkdirty(mk_pte(new_page, vma->vm_page_prot))));
new_page = old_page;
}
spin_unlock(&tsk->mm->page_table_lock);
- free_page(new_page);
+ __free_page(new_page);
return 1;
bad_wp_page:
spin_unlock(&tsk->mm->page_table_lock);
- printk("do_wp_page: bogus page at address %08lx (%08lx)\n",address,old_page);
+ printk("do_wp_page: bogus page at address %08lx (nr %ld)\n",address,map_nr);
return -1;
}
*/
static void partial_clear(struct vm_area_struct *vma, unsigned long address)
{
+ unsigned int offset;
+ struct page *page;
pgd_t *page_dir;
pmd_t *page_middle;
pte_t *page_table, pte;
if (pgd_none(*page_dir))
return;
if (pgd_bad(*page_dir)) {
- printk("bad page table directory entry %p:[%lx]\n", page_dir, pgd_val(*page_dir));
+ pgd_ERROR(*page_dir);
pgd_clear(page_dir);
return;
}
if (pmd_none(*page_middle))
return;
if (pmd_bad(*page_middle)) {
- printk("bad page table directory entry %p:[%lx]\n", page_dir, pgd_val(*page_dir));
+ pmd_ERROR(*page_middle);
pmd_clear(page_middle);
return;
}
if (!pte_present(pte))
return;
flush_cache_page(vma, address);
- address &= ~PAGE_MASK;
- address += pte_page(pte);
- if (MAP_NR(address) >= max_mapnr)
+ page = pte_page(pte);
+ if (page-mem_map >= max_mapnr)
return;
- memset((void *) address, 0, PAGE_SIZE - (address & ~PAGE_MASK));
- flush_page_to_ram(pte_page(pte));
+ offset = address & ~PAGE_MASK;
+ memclear_highpage_flush(page, offset, PAGE_SIZE - offset);
}
/*
* because it doesn't cost us any seek time. We also make sure to queue
* the 'original' request together with the readahead ones...
*/
-void swapin_readahead(unsigned long entry)
+void swapin_readahead(pte_t entry)
{
int i;
struct page *new_page;
static int do_swap_page(struct task_struct * tsk,
struct vm_area_struct * vma, unsigned long address,
- pte_t * page_table, unsigned long entry, int write_access)
+ pte_t * page_table, pte_t entry, int write_access)
{
struct page *page = lookup_swap_cache(entry);
pte_t pte;
if (!page)
return -1;
- flush_page_to_ram(page_address(page));
+ flush_page_to_ram(page);
}
vma->vm_mm->rss++;
swap_free(entry);
unlock_kernel();
- pte = mk_pte(page_address(page), vma->vm_page_prot);
+ pte = mk_pte(page, vma->vm_page_prot);
set_bit(PG_swap_entry, &page->flags);
if (write_access && !is_page_shared(page)) {
delete_from_swap_cache(page);
- page = replace_with_bigmem(page);
- pte = mk_pte(page_address(page), vma->vm_page_prot);
+ page = replace_with_highmem(page);
+ pte = mk_pte(page, vma->vm_page_prot);
pte = pte_mkwrite(pte_mkdirty(pte));
}
set_pte(page_table, pte);
*/
static int do_anonymous_page(struct task_struct * tsk, struct vm_area_struct * vma, pte_t *page_table, int write_access, unsigned long addr)
{
+ int high = 0;
+ struct page *page = NULL;
pte_t entry = pte_wrprotect(mk_pte(ZERO_PAGE(addr), vma->vm_page_prot));
if (write_access) {
- unsigned long page = __get_free_page(GFP_BIGUSER);
+ page = get_free_highpage(GFP_HIGHUSER);
if (!page)
return -1;
- clear_bigpage(page);
+ if (PageHighMem(page))
+ high = 1;
+ clear_highpage(page);
entry = pte_mkwrite(pte_mkdirty(mk_pte(page, vma->vm_page_prot)));
vma->vm_mm->rss++;
tsk->min_flt++;
static int do_no_page(struct task_struct * tsk, struct vm_area_struct * vma,
unsigned long address, int write_access, pte_t *page_table)
{
- unsigned long page;
+ struct page * new_page;
pte_t entry;
if (!vma->vm_ops || !vma->vm_ops->nopage)
* to copy, not share the page even if sharing is possible. It's
* essentially an early COW detection.
*/
- page = vma->vm_ops->nopage(vma, address & PAGE_MASK, (vma->vm_flags & VM_SHARED)?0:write_access);
- if (!page)
+ new_page = vma->vm_ops->nopage(vma, address & PAGE_MASK, (vma->vm_flags & VM_SHARED)?0:write_access);
+ if (!new_page)
return 0; /* SIGBUS - but we _really_ should know whether it is OOM or SIGBUS */
- if (page == -1)
+ if (new_page == (struct page *)-1)
return -1; /* OOM */
-
++tsk->maj_flt;
++vma->vm_mm->rss;
/*
* so we can make it writable and dirty to avoid having to
* handle that later.
*/
- flush_page_to_ram(page);
- entry = mk_pte(page, vma->vm_page_prot);
+ flush_page_to_ram(new_page);
+ entry = mk_pte(new_page, vma->vm_page_prot);
if (write_access) {
entry = pte_mkwrite(pte_mkdirty(entry));
- } else if (page_count(mem_map+MAP_NR(page)) > 1 &&
+ } else if (page_count(new_page) > 1 &&
!(vma->vm_flags & VM_SHARED))
entry = pte_wrprotect(entry);
set_pte(page_table, entry);
if (!pte_present(entry)) {
if (pte_none(entry))
return do_no_page(tsk, vma, address, write_access, pte);
- return do_swap_page(tsk, vma, address, pte, pte_val(entry), write_access);
+ return do_swap_page(tsk, vma, address, pte, entry, write_access);
}
/*
int handle_mm_fault(struct task_struct *tsk, struct vm_area_struct * vma,
unsigned long address, int write_access)
{
+ int ret = -1;
pgd_t *pgd;
pmd_t *pmd;
pgd = pgd_offset(vma->vm_mm, address);
pmd = pmd_alloc(pgd, address);
+
if (pmd) {
pte_t * pte = pte_alloc(pmd, address);
if (pte)
- return handle_pte_fault(tsk, vma, address, write_access, pte);
+ ret = handle_pte_fault(tsk, vma, address, write_access, pte);
}
- return -1;
+ return ret;
}
/*
vma = find_vma(tsk->mm, addr);
write = (vma->vm_flags & VM_WRITE) != 0;
- while (addr < end) {
+ if (addr >= end)
+ BUG();
+ do {
if (handle_mm_fault(tsk, vma, addr, write) < 0)
return -1;
addr += PAGE_SIZE;
- }
+ } while (addr < end);
return 0;
}
if (sysctl_overcommit_memory)
return 1;
- free = atomic_read(&buffermem) >> PAGE_SHIFT;
+ free = atomic_read(&buffermem_pages);
free += atomic_read(&page_cache_size);
free += nr_free_pages;
free += nr_swap_pages;
if (pmd_none(*pmd))
return;
if (pmd_bad(*pmd)) {
- printk("change_pte_range: bad pmd (%08lx)\n", pmd_val(*pmd));
+ pmd_ERROR(*pmd);
pmd_clear(pmd);
return;
}
set_pte(pte, pte_modify(entry, newprot));
address += PAGE_SIZE;
pte++;
- } while (address < end);
+ } while (address && (address < end));
}
static inline void change_pmd_range(pgd_t * pgd, unsigned long address,
if (pgd_none(*pgd))
return;
if (pgd_bad(*pgd)) {
- printk("change_pmd_range: bad pgd (%08lx)\n", pgd_val(*pgd));
+ pgd_ERROR(*pgd);
pgd_clear(pgd);
return;
}
change_pte_range(pmd, address, end - address, newprot);
address = (address + PMD_SIZE) & PMD_MASK;
pmd++;
- } while (address < end);
+ } while (address && (address < end));
}
static void change_protection(unsigned long start, unsigned long end, pgprot_t newprot)
dir = pgd_offset(current->mm, start);
flush_cache_range(current->mm, beg, end);
- while (start < end) {
+ if (start >= end)
+ BUG();
+ do {
change_pmd_range(dir, start, end - start, newprot);
start = (start + PGDIR_SIZE) & PGDIR_MASK;
dir++;
- }
+ } while (start && (start < end));
flush_tlb_range(current->mm, beg, end);
return;
}
if (pgd_none(*pgd))
goto end;
if (pgd_bad(*pgd)) {
- printk("move_one_page: bad source pgd (%08lx)\n", pgd_val(*pgd));
+ pgd_ERROR(*pgd);
pgd_clear(pgd);
goto end;
}
if (pmd_none(*pmd))
goto end;
if (pmd_bad(*pmd)) {
- printk("move_one_page: bad source pmd (%08lx)\n", pmd_val(*pmd));
+ pmd_ERROR(*pmd);
pmd_clear(pmd);
goto end;
}
#include <linux/interrupt.h>
#include <linux/init.h>
#include <linux/pagemap.h>
-#include <linux/bigmem.h> /* export bigmem vars */
+#include <linux/highmem.h>
+#include <linux/bootmem.h>
#include <asm/dma.h>
#include <asm/uaccess.h> /* for copy_to/from_user */
#define NR_MEM_LISTS 10
#endif
-/* The start of this MUST match the start of "struct page" */
struct free_area_struct {
- struct page *next;
- struct page *prev;
+ struct list_head free_list;
unsigned int * map;
};
-#define memory_head(x) ((struct page *)(x))
-
-#ifdef CONFIG_BIGMEM
-#define BIGMEM_LISTS_OFFSET NR_MEM_LISTS
+#ifdef CONFIG_HIGHMEM
+#define HIGHMEM_LISTS_OFFSET NR_MEM_LISTS
static struct free_area_struct free_area[NR_MEM_LISTS*2];
#else
static struct free_area_struct free_area[NR_MEM_LISTS];
#endif
-static inline void init_mem_queue(struct free_area_struct * head)
-{
- head->next = memory_head(head);
- head->prev = memory_head(head);
-}
-
-static inline void add_mem_queue(struct free_area_struct * head, struct page * entry)
-{
- struct page * next = head->next;
-
- entry->prev = memory_head(head);
- entry->next = next;
- next->prev = entry;
- head->next = entry;
-}
-
-static inline void remove_mem_queue(struct page * entry)
-{
- struct page * next = entry->next;
- struct page * prev = entry->prev;
- next->prev = prev;
- prev->next = next;
-}
-
/*
* Free_page() adds the page to the free lists. This is optimized for
* fast normal cases (no error jumps taken normally).
*/
spinlock_t page_alloc_lock = SPIN_LOCK_UNLOCKED;
+#define memlist_init(x) INIT_LIST_HEAD(x)
+#define memlist_add_head list_add
+#define memlist_add_tail list_add_tail
+#define memlist_del list_del
+#define memlist_entry list_entry
+#define memlist_next(x) ((x)->next)
+#define memlist_prev(x) ((x)->prev)
+
static inline void free_pages_ok(unsigned long map_nr, unsigned long order)
{
struct free_area_struct *area = free_area + order;
unsigned long index = map_nr >> (1 + order);
unsigned long mask = (~0UL) << order;
unsigned long flags;
+ struct page *page, *buddy;
spin_lock_irqsave(&page_alloc_lock, flags);
#define list(x) (mem_map+(x))
-#ifdef CONFIG_BIGMEM
- if (map_nr >= bigmem_mapnr) {
- area += BIGMEM_LISTS_OFFSET;
- nr_free_bigpages -= mask;
+#ifdef CONFIG_HIGHMEM
+ if (map_nr >= highmem_mapnr) {
+ area += HIGHMEM_LISTS_OFFSET;
+ nr_free_highpages -= mask;
}
#endif
map_nr &= mask;
nr_free_pages -= mask;
+
while (mask + (1 << (NR_MEM_LISTS-1))) {
if (!test_and_change_bit(index, area->map))
+ /*
+ * the buddy page is still allocated.
+ */
break;
- remove_mem_queue(list(map_nr ^ -mask));
+ /*
+ * Move the buddy up one level.
+ */
+ buddy = list(map_nr ^ -mask);
+ page = list(map_nr);
+
+ memlist_del(&buddy->list);
mask <<= 1;
area++;
index >>= 1;
map_nr &= mask;
}
- add_mem_queue(area, list(map_nr));
-
+ memlist_add_head(&(list(map_nr))->list, &area->free_list);
#undef list
spin_unlock_irqrestore(&page_alloc_lock, flags);
}
+/*
+ * Some ugly macros to speed up __get_free_pages()..
+ */
+#define MARK_USED(index, order, area) \
+ change_bit((index) >> (1+(order)), (area)->map)
+#define CAN_DMA(x) (PageDMA(x))
+#define ADDRESS(x) (PAGE_OFFSET + ((x) << PAGE_SHIFT))
+
int __free_page(struct page *page)
{
if (!PageReserved(page) && put_page_testzero(page)) {
if (PageLocked(page))
PAGE_BUG(page);
- free_pages_ok(page - mem_map, 0);
+ free_pages_ok(page-mem_map, 0);
return 1;
}
return 0;
return 0;
}
-/*
- * Some ugly macros to speed up __get_free_pages()..
- */
-#define MARK_USED(index, order, area) \
- change_bit((index) >> (1+(order)), (area)->map)
-#define CAN_DMA(x) (PageDMA(x))
-#define ADDRESS(x) (PAGE_OFFSET + ((x) << PAGE_SHIFT))
+static inline unsigned long EXPAND (struct page *map, unsigned long index,
+ int low, int high, struct free_area_struct * area)
+{
+ unsigned long size = 1 << high;
+
+ while (high > low) {
+ area--;
+ high--;
+ size >>= 1;
+ memlist_add_head(&(map)->list, &(area)->free_list);
+ MARK_USED(index, high, area);
+ index += size;
+ map += size;
+ }
+ set_page_count(map, 1);
+ return index;
+}
+
+static inline struct page * rmqueue (int order, int gfp_mask, int offset)
+{
+ struct free_area_struct * area = free_area+order+offset;
+ unsigned long curr_order = order, map_nr;
+ struct page *page;
+ struct list_head *head, *curr;
+
+ do {
+ head = &area->free_list;
+ curr = memlist_next(head);
+
+ while (curr != head) {
+ page = memlist_entry(curr, struct page, list);
+ if (!(gfp_mask & __GFP_DMA) || CAN_DMA(page)) {
+ memlist_del(curr);
+ map_nr = page - mem_map;
+ MARK_USED(map_nr, curr_order, area);
+ nr_free_pages -= 1 << order;
+ map_nr = EXPAND(page, map_nr, order, curr_order, area);
+ page = mem_map + map_nr;
+ return page;
+ }
+ curr = memlist_next(curr);
+ }
+ curr_order++;
+ area++;
+ } while (curr_order < NR_MEM_LISTS);
-#ifdef CONFIG_BIGMEM
-#define RMQUEUEBIG(order, gfp_mask) \
-if (gfp_mask & __GFP_BIGMEM) { \
- struct free_area_struct * area = free_area+order+BIGMEM_LISTS_OFFSET; \
- unsigned long new_order = order; \
- do { struct page *prev = memory_head(area), *ret = prev->next; \
- if (memory_head(area) != ret) { \
- unsigned long map_nr; \
- (prev->next = ret->next)->prev = prev; \
- map_nr = ret - mem_map; \
- MARK_USED(map_nr, new_order, area); \
- nr_free_pages -= 1 << order; \
- nr_free_bigpages -= 1 << order; \
- EXPAND(ret, map_nr, order, new_order, area); \
- spin_unlock_irqrestore(&page_alloc_lock, flags); \
- return ADDRESS(map_nr); \
- } \
- new_order++; area++; \
- } while (new_order < NR_MEM_LISTS); \
+ return NULL;
}
+
+static inline int balance_lowmemory (int gfp_mask)
+{
+ int freed;
+ static int low_on_memory = 0;
+
+#ifndef CONFIG_HIGHMEM
+ if (nr_free_pages > freepages.min) {
+ if (!low_on_memory)
+ return 1;
+ if (nr_free_pages >= freepages.high) {
+ low_on_memory = 0;
+ return 1;
+ }
+ }
+
+ low_on_memory = 1;
+#else
+ static int low_on_highmemory = 0;
+
+ if (gfp_mask & __GFP_HIGHMEM)
+ {
+ if (nr_free_pages > freepages.min) {
+ if (!low_on_highmemory) {
+ return 1;
+ }
+ if (nr_free_pages >= freepages.high) {
+ low_on_highmemory = 0;
+ return 1;
+ }
+ }
+ low_on_highmemory = 1;
+ } else {
+ if (nr_free_pages+nr_free_highpages > freepages.min) {
+ if (!low_on_memory) {
+ return 1;
+ }
+ if (nr_free_pages+nr_free_highpages >= freepages.high) {
+ low_on_memory = 0;
+ return 1;
+ }
+ }
+ low_on_memory = 1;
+ }
#endif
+ current->flags |= PF_MEMALLOC;
+ freed = try_to_free_pages(gfp_mask);
+ current->flags &= ~PF_MEMALLOC;
-#define RMQUEUE(order, gfp_mask) \
-do { struct free_area_struct * area = free_area+order; \
- unsigned long new_order = order; \
- do { struct page *prev = memory_head(area), *ret = prev->next; \
- while (memory_head(area) != ret) { \
- if (!(gfp_mask & __GFP_DMA) || CAN_DMA(ret)) { \
- unsigned long map_nr; \
- (prev->next = ret->next)->prev = prev; \
- map_nr = ret - mem_map; \
- MARK_USED(map_nr, new_order, area); \
- nr_free_pages -= 1 << order; \
- EXPAND(ret, map_nr, order, new_order, area); \
- spin_unlock_irqrestore(&page_alloc_lock,flags);\
- return ADDRESS(map_nr); \
- } \
- prev = ret; \
- ret = ret->next; \
- } \
- new_order++; area++; \
- } while (new_order < NR_MEM_LISTS); \
-} while (0)
-
-#define EXPAND(map,index,low,high,area) \
-do { unsigned long size = 1 << high; \
- while (high > low) { \
- area--; high--; size >>= 1; \
- add_mem_queue(area, map); \
- MARK_USED(index, high, area); \
- index += size; \
- map += size; \
- } \
- set_page_count(map, 1); \
-} while (0)
+ if (!freed && !(gfp_mask & (__GFP_MED | __GFP_HIGH)))
+ return 0;
+ return 1;
+}
-unsigned long __get_free_pages(int gfp_mask, unsigned long order)
+struct page * __get_pages(int gfp_mask, unsigned long order)
{
unsigned long flags;
+ struct page *page;
if (order >= NR_MEM_LISTS)
goto nopage;
-#ifdef ATOMIC_MEMORY_DEBUGGING
- if ((gfp_mask & __GFP_WAIT) && in_interrupt()) {
- static int count = 0;
- if (++count < 5) {
- printk("gfp called nonatomically from interrupt %p\n",
- __builtin_return_address(0));
- }
- goto nopage;
- }
-#endif
+ /*
+ * If anyone calls gfp from interrupts nonatomically then it
+ * will sooner or later tripped up by a schedule().
+ */
/*
* If this is a recursive call, we'd better
* do our best to just allocate things without
* further thought.
*/
- if (!(current->flags & PF_MEMALLOC)) {
- int freed;
- static int low_on_memory = 0;
+ if (!(current->flags & PF_MEMALLOC))
+ goto lowmemory;
-#ifndef CONFIG_BIGMEM
- if (nr_free_pages > freepages.min) {
- if (!low_on_memory)
- goto ok_to_allocate;
- if (nr_free_pages >= freepages.high) {
- low_on_memory = 0;
- goto ok_to_allocate;
- }
- }
+ok_to_allocate:
+ spin_lock_irqsave(&page_alloc_lock, flags);
- low_on_memory = 1;
-#else
- static int low_on_bigmemory = 0;
-
- if (gfp_mask & __GFP_BIGMEM)
- {
- if (nr_free_pages > freepages.min) {
- if (!low_on_bigmemory)
- goto ok_to_allocate;
- if (nr_free_pages >= freepages.high) {
- low_on_bigmemory = 0;
- goto ok_to_allocate;
- }
- }
- low_on_bigmemory = 1;
- } else {
- if (nr_free_pages-nr_free_bigpages > freepages.min) {
- if (!low_on_memory)
- goto ok_to_allocate;
- if (nr_free_pages-nr_free_bigpages >= freepages.high) {
- low_on_memory = 0;
- goto ok_to_allocate;
- }
- }
- low_on_memory = 1;
+#ifdef CONFIG_HIGHMEM
+ if (gfp_mask & __GFP_HIGHMEM) {
+ page = rmqueue(order, gfp_mask, HIGHMEM_LISTS_OFFSET);
+ if (page) {
+ nr_free_highpages -= 1 << order;
+ spin_unlock_irqrestore(&page_alloc_lock, flags);
+ goto ret;
}
-#endif
- current->flags |= PF_MEMALLOC;
- freed = try_to_free_pages(gfp_mask);
- current->flags &= ~PF_MEMALLOC;
-
- if (!freed && !(gfp_mask & (__GFP_MED | __GFP_HIGH)))
- goto nopage;
}
-ok_to_allocate:
- spin_lock_irqsave(&page_alloc_lock, flags);
-#ifdef CONFIG_BIGMEM
- RMQUEUEBIG(order, gfp_mask);
#endif
- RMQUEUE(order, gfp_mask);
+ page = rmqueue(order, gfp_mask, 0);
spin_unlock_irqrestore(&page_alloc_lock, flags);
+ if (page)
+ goto ret;
/*
* If we can schedule, do so, and make sure to yield.
}
nopage:
- return 0;
+ return NULL;
+
+lowmemory:
+ if (balance_lowmemory(gfp_mask))
+ goto ok_to_allocate;
+ goto nopage;
+ret:
+ return page;
+}
+
+unsigned long __get_free_pages(int gfp_mask, unsigned long order)
+{
+ struct page *page;
+ page = __get_pages(gfp_mask, order);
+ if (!page)
+ return 0;
+ return page_address(page);
+}
+
+struct page * get_free_highpage(int gfp_mask)
+{
+ return __get_pages(gfp_mask, 0);
}
/*
unsigned long order, flags;
unsigned long total = 0;
- printk("Free pages: %6dkB (%6dkB BigMem)\n ( ",
+ printk("Free pages: %6dkB (%6ldkB HighMem)\n ( ",
nr_free_pages<<(PAGE_SHIFT-10),
- nr_free_bigpages<<(PAGE_SHIFT-10));
+ nr_free_highpages<<(PAGE_SHIFT-10));
printk("Free: %d, lru_cache: %d (%d %d %d)\n",
nr_free_pages,
nr_lru_pages,
freepages.min,
freepages.low,
freepages.high);
+
spin_lock_irqsave(&page_alloc_lock, flags);
- for (order=0 ; order < NR_MEM_LISTS; order++) {
- struct page * tmp;
+ for (order = 0; order < NR_MEM_LISTS; order++) {
unsigned long nr = 0;
- for (tmp = free_area[order].next ; tmp != memory_head(free_area+order) ; tmp = tmp->next) {
- nr ++;
- }
-#ifdef CONFIG_BIGMEM
- for (tmp = free_area[BIGMEM_LISTS_OFFSET+order].next;
- tmp != memory_head(free_area+BIGMEM_LISTS_OFFSET+order);
- tmp = tmp->next) {
- nr ++;
+ struct list_head *head, *curr;
+ struct page *page;
+
+ head = &free_area[order].free_list;
+ for (curr = memlist_next(head); curr != head; curr = memlist_next(curr)) {
+ page = memlist_entry(curr, struct page, list);
+ nr++;
}
+#ifdef CONFIG_HIGHMEM
+ head = &free_area[order+HIGHMEM_LISTS_OFFSET].free_list;
+ for (curr = memlist_next(head); curr != head; curr = memlist_next(curr))
+ nr++;
#endif
total += nr * ((PAGE_SIZE>>10) << order);
printk("%lu*%lukB ", nr, (unsigned long)((PAGE_SIZE>>10) << order));
}
spin_unlock_irqrestore(&page_alloc_lock, flags);
+
printk("= %lukB)\n", total);
#ifdef SWAP_CACHE_INFO
show_swap_cache_info();
* - mark all memory queues empty
* - clear the memory bitmaps
*/
-unsigned long __init free_area_init(unsigned long start_mem, unsigned long end_mem)
+volatile int data;
+void __init free_area_init(unsigned long end_mem_pages)
{
mem_map_t * p;
- unsigned long mask = PAGE_MASK;
+ unsigned long mask = -1;
unsigned long i;
+ unsigned long map_size;
/*
* Select nr of pages we try to keep free for important stuff
* This is fairly arbitrary, but based on some behaviour
* analysis.
*/
- i = (end_mem - PAGE_OFFSET) >> (PAGE_SHIFT+7);
+ i = end_mem_pages >> 7;
if (i < 10)
i = 10;
if (i > 256)
freepages.min = i;
freepages.low = i * 2;
freepages.high = i * 3;
- mem_map = (mem_map_t *) LONG_ALIGN(start_mem);
- p = mem_map + MAP_NR(end_mem);
- start_mem = LONG_ALIGN((unsigned long) p);
- memset(mem_map, 0, start_mem - (unsigned long) mem_map);
- do {
- --p;
+
+ /*
+ * Most architectures just pick 'start_mem'. Some architectures
+ * (with lots of mem and discontinous memory maps) have to search
+ * for a good area.
+ */
+ map_size = end_mem_pages*sizeof(struct page);
+ mem_map = (struct page *) alloc_bootmem(map_size);
+ memset(mem_map, 0, map_size);
+
+ /*
+ * Initially all pages are reserved - free ones are freed
+ * up by free_all_bootmem() once the early boot process is
+ * done.
+ */
+ for (p = mem_map; p < mem_map + end_mem_pages; p++) {
set_page_count(p, 0);
- p->flags = (1 << PG_DMA) | (1 << PG_reserved);
+ p->flags = (1 << PG_DMA);
+ SetPageReserved(p);
init_waitqueue_head(&p->wait);
- } while (p > mem_map);
-
+ memlist_init(&p->list);
+ }
+
for (i = 0 ; i < NR_MEM_LISTS ; i++) {
unsigned long bitmap_size;
- init_mem_queue(free_area+i);
-#ifdef CONFIG_BIGMEM
- init_mem_queue(free_area+BIGMEM_LISTS_OFFSET+i);
+ unsigned int * map;
+ memlist_init(&(free_area+i)->free_list);
+#ifdef CONFIG_HIGHMEM
+ memlist_init(&(free_area+HIGHMEM_LISTS_OFFSET+i)->free_list);
#endif
mask += mask;
- end_mem = (end_mem + ~mask) & mask;
- bitmap_size = (end_mem - PAGE_OFFSET) >> (PAGE_SHIFT + i);
+ end_mem_pages = (end_mem_pages + ~mask) & mask;
+ bitmap_size = end_mem_pages >> i;
bitmap_size = (bitmap_size + 7) >> 3;
bitmap_size = LONG_ALIGN(bitmap_size);
- free_area[i].map = (unsigned int *) start_mem;
- memset((void *) start_mem, 0, bitmap_size);
- start_mem += bitmap_size;
-#ifdef CONFIG_BIGMEM
- free_area[BIGMEM_LISTS_OFFSET+i].map = (unsigned int *) start_mem;
- memset((void *) start_mem, 0, bitmap_size);
- start_mem += bitmap_size;
+ map = (unsigned int *) alloc_bootmem(bitmap_size);
+ free_area[i].map = map;
+ memset((void *) map, 0, bitmap_size);
+#ifdef CONFIG_HIGHMEM
+ map = (unsigned int *) alloc_bootmem(bitmap_size);
+ free_area[HIGHMEM_LISTS_OFFSET+i].map = map;
+ memset((void *) map, 0, bitmap_size);
#endif
}
- return start_mem;
}
* that shared pages stay shared while being swapped.
*/
-static int rw_swap_page_base(int rw, unsigned long entry, struct page *page, int wait)
+static int rw_swap_page_base(int rw, pte_t entry, struct page *page, int wait)
{
unsigned long type, offset;
struct swap_info_struct * p;
kdev_t dev = 0;
int block_size;
-#ifdef DEBUG_SWAP
- printk ("DebugVM: %s_swap_page entry %08lx, page %p (count %d), %s\n",
- (rw == READ) ? "read" : "write",
- entry, (char *) page_address(page), page_count(page),
- wait ? "wait" : "nowait");
-#endif
-
type = SWP_TYPE(entry);
if (type >= nr_swapfiles) {
printk("Internal error: bad swap-device\n");
return 0;
}
if (p->swap_map && !p->swap_map[offset]) {
- printk(KERN_ERR "rw_swap_page: "
- "Trying to %s unallocated swap (%08lx)\n",
- (rw == READ) ? "read" : "write", entry);
+ pte_ERROR(entry);
return 0;
}
if (!(p->flags & SWP_USED)) {
if (page_count(page) == 0)
printk(KERN_ERR "rw_swap_page: page unused while waiting!\n");
-#ifdef DEBUG_SWAP
- printk ("DebugVM: %s_swap_page finished on page %p (count %d)\n",
- (rw == READ) ? "read" : "write",
- (char *) page_address(page),
- page_count(page));
-#endif
return 1;
}
*/
void rw_swap_page(int rw, struct page *page, int wait)
{
- unsigned long entry = page->offset;
+ pte_t entry = get_pagecache_pte(page);
if (!PageLocked(page))
PAGE_BUG(page);
* Therefore we can't use it. Later when we can remove the need for the
* lock map and we can reduce the number of functions exported.
*/
-void rw_swap_page_nolock(int rw, unsigned long entry, char *buf, int wait)
+void rw_swap_page_nolock(int rw, pte_t entry, char *buf, int wait)
{
struct page *page = mem_map + MAP_NR(buf);
* slab an obj belongs to. With kmalloc(), and kfree(), these are used
* to find the cache which an obj belongs to.
*/
-#define SLAB_SET_PAGE_CACHE(pg, x) ((pg)->next = (struct page *)(x))
-#define SLAB_GET_PAGE_CACHE(pg) ((kmem_cache_t *)(pg)->next)
-#define SLAB_SET_PAGE_SLAB(pg, x) ((pg)->prev = (struct page *)(x))
-#define SLAB_GET_PAGE_SLAB(pg) ((kmem_slab_t *)(pg)->prev)
+#define SLAB_SET_PAGE_CACHE(pg,x) ((pg)->list.next = (struct list_head *)(x))
+#define SLAB_GET_PAGE_CACHE(pg) ((kmem_cache_t *)(pg)->list.next)
+#define SLAB_SET_PAGE_SLAB(pg,x) ((pg)->list.prev = (struct list_head *)(x))
+#define SLAB_GET_PAGE_SLAB(pg) ((kmem_slab_t *)(pg)->list.prev)
/* Size description struct for general caches. */
typedef struct cache_sizes {
static unsigned long bufctl_limit = 0;
/* Initialisation - setup the `cache' cache. */
-long __init kmem_cache_init(long start, long end)
+void __init kmem_cache_init(void)
{
size_t size, i;
*/
if (num_physpages > (32 << 20) >> PAGE_SHIFT)
slab_break_gfp_order = SLAB_BREAK_GFP_ORDER_HI;
- return start;
}
/* Initialisation - setup remaining internal and general caches.
}
#endif
-void add_to_swap_cache(struct page *page, unsigned long entry)
+void add_to_swap_cache(struct page *page, pte_t entry)
{
#ifdef SWAP_CACHE_INFO
swap_cache_add_total++;
#endif
-#ifdef DEBUG_SWAP
- printk("DebugVM: add_to_swap_cache(%08lx count %d, entry %08lx)\n",
- page_address(page), page_count(page), entry);
-#endif
- if (PageTestandSetSwapCache(page)) {
- printk(KERN_ERR "swap_cache: replacing non-empty entry %08lx "
- "on page %08lx\n",
- page->offset, page_address(page));
- }
- if (page->inode) {
- printk(KERN_ERR "swap_cache: replacing page-cached entry "
- "on page %08lx\n", page_address(page));
- }
- add_to_page_cache(page, &swapper_inode, entry);
+ if (PageTestandSetSwapCache(page))
+ BUG();
+ if (page->inode)
+ BUG();
+ add_to_page_cache(page, &swapper_inode, pte_val(entry));
}
/*
* Note: if swap_map[] reaches SWAP_MAP_MAX the entries are treated as
* "permanent", but will be reclaimed by the next swapoff.
*/
-int swap_duplicate(unsigned long entry)
+int swap_duplicate(pte_t entry)
{
struct swap_info_struct * p;
unsigned long offset, type;
int result = 0;
- if (!entry)
+ if (!pte_val(entry))
goto out;
type = SWP_TYPE(entry);
if (type & SHM_SWP_TYPE)
else {
static int overflow = 0;
if (overflow++ < 5)
- printk(KERN_WARNING
- "swap_duplicate: entry %08lx map count=%d\n",
- entry, p->swap_map[offset]);
+ pte_ERROR(entry);
p->swap_map[offset] = SWAP_MAP_MAX;
}
result = 1;
-#ifdef DEBUG_SWAP
- printk("DebugVM: swap_duplicate(entry %08lx, count now %d)\n",
- entry, p->swap_map[offset]);
-#endif
out:
return result;
bad_file:
- printk(KERN_ERR
- "swap_duplicate: entry %08lx, nonexistent swap file\n", entry);
+ pte_ERROR(entry);
goto out;
bad_offset:
- printk(KERN_ERR
- "swap_duplicate: entry %08lx, offset exceeds max\n", entry);
+ pte_ERROR(entry);
goto out;
bad_unused:
- printk(KERN_ERR
- "swap_duplicate at %8p: entry %08lx, unused page\n",
- __builtin_return_address(0), entry);
+ pte_ERROR(entry);
goto out;
}
-int swap_count(unsigned long entry)
+int swap_count(struct page *page)
{
struct swap_info_struct * p;
unsigned long offset, type;
+ pte_t entry = get_pagecache_pte(page);
int retval = 0;
- if (!entry)
+ if (!pte_val(entry))
goto bad_entry;
type = SWP_TYPE(entry);
if (type & SHM_SWP_TYPE)
if (!p->swap_map[offset])
goto bad_unused;
retval = p->swap_map[offset];
-#ifdef DEBUG_SWAP
- printk("DebugVM: swap_count(entry %08lx, count %d)\n",
- entry, retval);
-#endif
out:
return retval;
printk(KERN_ERR "swap_count: null entry!\n");
goto out;
bad_file:
- printk(KERN_ERR
- "swap_count: entry %08lx, nonexistent swap file!\n", entry);
+ pte_ERROR(entry);
goto out;
bad_offset:
- printk(KERN_ERR
- "swap_count: entry %08lx, offset exceeds max!\n", entry);
+ pte_ERROR(entry);
goto out;
bad_unused:
- printk(KERN_ERR
- "swap_count at %8p: entry %08lx, unused page!\n",
- __builtin_return_address(0), entry);
+ pte_ERROR(entry);
goto out;
}
{
struct inode *inode = page->inode;
- if (!inode) {
- printk ("VM: Removing swap cache page with zero inode hash "
- "on page %08lx\n", page_address(page));
- return;
- }
- if (inode != &swapper_inode) {
- printk ("VM: Removing swap cache page with wrong inode hash "
- "on page %08lx\n", page_address(page));
- }
+ if (!inode)
+ BUG();
+ if (inode != &swapper_inode)
+ BUG();
if (!PageSwapCache(page))
PAGE_BUG(page);
-#ifdef DEBUG_SWAP
- printk("DebugVM: remove_from_swap_cache(%08lx count %d)\n",
- page_address(page), page_count(page));
-#endif
PageClearSwapCache(page);
remove_inode_page(page);
}
*/
void __delete_from_swap_cache(struct page *page)
{
- long entry = page->offset;
+ pte_t entry = get_pagecache_pte(page);
#ifdef SWAP_CACHE_INFO
swap_cache_del_total++;
#endif
-#ifdef DEBUG_SWAP
- printk("DebugVM: delete_from_swap_cache(%08lx count %d, "
- "entry %08lx)\n",
- page_address(page), page_count(page), entry);
-#endif
- remove_from_swap_cache (page);
+ remove_from_swap_cache(page);
lock_kernel();
- swap_free (entry);
+ swap_free(entry);
unlock_kernel();
}
* this page if it is the last user of the page.
*/
-void free_page_and_swap_cache(unsigned long addr)
+void free_page_and_swap_cache(struct page *page)
{
- struct page *page = mem_map + MAP_NR(addr);
-
/*
* If we are the only user, then free up the swap cache.
*/
* lock before returning.
*/
-struct page * lookup_swap_cache(unsigned long entry)
+struct page * lookup_swap_cache(pte_t entry)
{
struct page *found;
swap_cache_find_total++;
#endif
while (1) {
- found = find_lock_page(&swapper_inode, entry);
+ /*
+ * Right now the pagecache is 32-bit only.
+ */
+ found = find_lock_page(&swapper_inode, pte_val(entry));
if (!found)
return 0;
if (found->inode != &swapper_inode || !PageSwapCache(found))
* the swap entry is no longer in use.
*/
-struct page * read_swap_cache_async(unsigned long entry, int wait)
+struct page * read_swap_cache_async(pte_t entry, int wait)
{
struct page *found_page = 0, *new_page;
unsigned long new_page_addr;
-#ifdef DEBUG_SWAP
- printk("DebugVM: read_swap_cache_async entry %08lx%s\n",
- entry, wait ? ", wait" : "");
-#endif
/*
* Make sure the swap entry is still in use.
*/
*/
add_to_swap_cache(new_page, entry);
rw_swap_page(READ, new_page, wait);
-#ifdef DEBUG_SWAP
- printk("DebugVM: read_swap_cache_async created "
- "entry %08lx at %p\n",
- entry, (char *) page_address(new_page));
-#endif
return new_page;
out_free_page:
return 0;
}
-unsigned long get_swap_page(void)
+pte_t get_swap_page(void)
{
struct swap_info_struct * p;
- unsigned long offset, entry;
+ unsigned long offset;
+ pte_t entry = __pte(0);
int type, wrapped = 0;
type = swap_list.next;
if (type < 0)
- return 0;
+ goto out;
if (nr_swap_pages == 0)
- return 0;
+ goto out;
while (1) {
p = &swap_info[type];
} else {
swap_list.next = type;
}
- return entry;
+ goto out;
}
}
type = p->next;
type = swap_list.head;
wrapped = 1;
}
- } else if (type < 0) {
- return 0; /* out of swap space */
- }
+ } else
+ if (type < 0)
+ goto out; /* out of swap space */
}
+out:
+ return entry;
}
-void swap_free(unsigned long entry)
+void swap_free(pte_t entry)
{
struct swap_info_struct * p;
unsigned long offset, type;
- if (!entry)
+ if (!pte_val(entry))
goto out;
type = SWP_TYPE(entry);
nr_swap_pages++;
}
}
-#ifdef DEBUG_SWAP
- printk("DebugVM: swap_free(entry %08lx, count now %d)\n",
- entry, p->swap_map[offset]);
-#endif
out:
return;
printk("swap_free: offset exceeds max\n");
goto out;
bad_free:
- printk("swap_free: swap-space map bad (entry %08lx)\n",entry);
+ pte_ERROR(entry);
goto out;
}
/* needs the big kernel lock */
-unsigned long acquire_swap_entry(struct page *page)
+pte_t acquire_swap_entry(struct page *page)
{
struct swap_info_struct * p;
unsigned long offset, type;
- unsigned long entry;
+ pte_t entry;
if (!test_bit(PG_swap_entry, &page->flags))
goto new_swap_entry;
/* We have the old entry in the page offset still */
- entry = page->offset;
- if (!entry)
+ if (!page->offset)
goto new_swap_entry;
+ entry = get_pagecache_pte(page);
type = SWP_TYPE(entry);
if (type & SHM_SWP_TYPE)
goto new_swap_entry;
* what to do if a write is requested later.
*/
static inline void unuse_pte(struct vm_area_struct * vma, unsigned long address,
- pte_t *dir, unsigned long entry, unsigned long page)
+ pte_t *dir, pte_t entry, struct page* page)
{
pte_t pte = *dir;
set_pte(dir, pte_mkdirty(pte));
return;
}
- if (pte_val(pte) != entry)
+ if (pte_val(pte) != pte_val(entry))
return;
set_pte(dir, pte_mkdirty(mk_pte(page, vma->vm_page_prot)));
swap_free(entry);
static inline void unuse_pmd(struct vm_area_struct * vma, pmd_t *dir,
unsigned long address, unsigned long size, unsigned long offset,
- unsigned long entry, unsigned long page)
+ pte_t entry, struct page* page)
{
pte_t * pte;
unsigned long end;
if (pmd_none(*dir))
return;
if (pmd_bad(*dir)) {
- printk("unuse_pmd: bad pmd (%08lx)\n", pmd_val(*dir));
+ pmd_ERROR(*dir);
pmd_clear(dir);
return;
}
unuse_pte(vma, offset+address-vma->vm_start, pte, entry, page);
address += PAGE_SIZE;
pte++;
- } while (address < end);
+ } while (address && (address < end));
}
static inline void unuse_pgd(struct vm_area_struct * vma, pgd_t *dir,
unsigned long address, unsigned long size,
- unsigned long entry, unsigned long page)
+ pte_t entry, struct page* page)
{
pmd_t * pmd;
unsigned long offset, end;
if (pgd_none(*dir))
return;
if (pgd_bad(*dir)) {
- printk("unuse_pgd: bad pgd (%08lx)\n", pgd_val(*dir));
+ pgd_ERROR(*dir);
pgd_clear(dir);
return;
}
end = address + size;
if (end > PGDIR_SIZE)
end = PGDIR_SIZE;
+ if (address >= end)
+ BUG();
do {
unuse_pmd(vma, pmd, address, end - address, offset, entry,
page);
address = (address + PMD_SIZE) & PMD_MASK;
pmd++;
- } while (address < end);
+ } while (address && (address < end));
}
static void unuse_vma(struct vm_area_struct * vma, pgd_t *pgdir,
- unsigned long entry, unsigned long page)
+ pte_t entry, struct page* page)
{
unsigned long start = vma->vm_start, end = vma->vm_end;
- while (start < end) {
+ if (start >= end)
+ BUG();
+ do {
unuse_pgd(vma, pgdir, start, end - start, entry, page);
start = (start + PGDIR_SIZE) & PGDIR_MASK;
pgdir++;
- }
+ } while (start && (start < end));
}
-static void unuse_process(struct mm_struct * mm, unsigned long entry,
- unsigned long page)
+static void unuse_process(struct mm_struct * mm,
+ pte_t entry, struct page* page)
{
struct vm_area_struct* vma;
{
struct swap_info_struct * si = &swap_info[type];
struct task_struct *p;
- struct page *page_map;
- unsigned long entry, page;
+ struct page *page;
+ pte_t entry;
int i;
while (1) {
/* Get a page for the entry, using the existing swap
cache page if there is one. Otherwise, get a clean
page and read the swap into it. */
- page_map = read_swap_cache(entry);
- if (!page_map) {
+ page = read_swap_cache(entry);
+ if (!page) {
/*
* Continue searching if the entry became unused.
*/
continue;
return -ENOMEM;
}
- page = page_address(page_map);
read_lock(&tasklist_lock);
for_each_task(p)
unuse_process(p->mm, entry, page);
shm_unuse(entry, page);
/* Now get rid of the extra reference to the temporary
page we've been using. */
- if (PageSwapCache(page_map))
- delete_from_swap_cache(page_map);
- __free_page(page_map);
+ if (PageSwapCache(page))
+ delete_from_swap_cache(page);
+ __free_page(page);
/*
* Check for and clear any overflowed swap map counts.
*/
if (si->swap_map[i] != 0) {
if (si->swap_map[i] != SWAP_MAP_MAX)
- printk(KERN_ERR
- "try_to_unuse: entry %08lx count=%d\n",
- entry, si->swap_map[i]);
+ pte_ERROR(entry);
si->swap_map[i] = 0;
nr_swap_pages++;
}
if (pmd_none(*pmd))
return;
if (pmd_bad(*pmd)) {
- printk("free_area_pte: bad pmd (%08lx)\n", pmd_val(*pmd));
+ pmd_ERROR(*pmd);
pmd_clear(pmd);
return;
}
end = address + size;
if (end > PMD_SIZE)
end = PMD_SIZE;
- while (address < end) {
+ do {
pte_t page = *pte;
pte_clear(pte);
address += PAGE_SIZE;
if (pte_none(page))
continue;
if (pte_present(page)) {
- free_page(pte_page(page));
+ __free_page(mem_map+pte_pagenr(page));
continue;
}
printk("Whee.. Swapped out page in kernel page table\n");
- }
+ } while (address < end);
}
static inline void free_area_pmd(pgd_t * dir, unsigned long address, unsigned long size)
if (pgd_none(*dir))
return;
if (pgd_bad(*dir)) {
- printk("free_area_pmd: bad pgd (%08lx)\n", pgd_val(*dir));
+ pgd_ERROR(*dir);
pgd_clear(dir);
return;
}
end = address + size;
if (end > PGDIR_SIZE)
end = PGDIR_SIZE;
- while (address < end) {
+ do {
free_area_pte(pmd, address, end - address);
address = (address + PMD_SIZE) & PMD_MASK;
pmd++;
- }
+ } while (address < end);
}
void vmfree_area_pages(unsigned long address, unsigned long size)
dir = pgd_offset_k(address);
flush_cache_all();
- while (address < end) {
+ do {
free_area_pmd(dir, address, end - address);
address = (address + PGDIR_SIZE) & PGDIR_MASK;
dir++;
- }
+ } while (address && (address < end));
flush_tlb_all();
}
end = address + size;
if (end > PMD_SIZE)
end = PMD_SIZE;
- while (address < end) {
- unsigned long page;
+ do {
+ struct page * page;
if (!pte_none(*pte))
printk("alloc_area_pte: page already exists\n");
- page = __get_free_page(GFP_KERNEL|GFP_BIGMEM);
+ page = get_free_highpage(GFP_KERNEL|__GFP_HIGHMEM);
if (!page)
return -ENOMEM;
set_pte(pte, mk_pte(page, PAGE_KERNEL));
address += PAGE_SIZE;
pte++;
- }
+ } while (address < end);
return 0;
}
end = address + size;
if (end > PGDIR_SIZE)
end = PGDIR_SIZE;
- while (address < end) {
+ do {
pte_t * pte = pte_alloc_kernel(pmd, address);
if (!pte)
return -ENOMEM;
return -ENOMEM;
address = (address + PMD_SIZE) & PMD_MASK;
pmd++;
- }
+ } while (address < end);
return 0;
}
dir = pgd_offset_k(address);
flush_cache_all();
- while (address < end) {
+ do {
pmd_t *pmd;
pgd_t olddir = *dir;
set_pgdir(address, *dir);
address = (address + PGDIR_SIZE) & PGDIR_MASK;
dir++;
- }
+ } while (address && (address < end));
flush_tlb_all();
return 0;
}
struct vm_struct *area;
size = PAGE_ALIGN(size);
- if (!size || size > (max_mapnr << PAGE_SHIFT))
+ if (!size || size > (max_mapnr << PAGE_SHIFT)) {
+ BUG();
return NULL;
+ }
area = get_vm_area(size);
- if (!area)
+ if (!area) {
+ BUG();
return NULL;
+ }
addr = area->addr;
if (vmalloc_area_pages(VMALLOC_VMADDR(addr), size)) {
vfree(addr);
+ BUG();
return NULL;
}
return addr;
#include <linux/smp_lock.h>
#include <linux/pagemap.h>
#include <linux/init.h>
-#include <linux/bigmem.h>
+#include <linux/highmem.h>
#include <asm/pgtable.h>
*/
static int try_to_swap_out(struct vm_area_struct* vma, unsigned long address, pte_t * page_table, int gfp_mask)
{
- pte_t pte;
- unsigned long entry;
- unsigned long page_addr;
+ pte_t pte, entry;
struct page * page;
pte = *page_table;
if (!pte_present(pte))
goto out_failed;
- page_addr = pte_page(pte);
- if (MAP_NR(page_addr) >= max_mapnr)
+ page = pte_page(pte);
+ if (page-mem_map >= max_mapnr)
goto out_failed;
- page = mem_map + MAP_NR(page_addr);
-
/* Don't look at this pte if it's been accessed recently. */
if (pte_young(pte)) {
/*
if (PageReserved(page)
|| PageLocked(page)
|| ((gfp_mask & __GFP_DMA) && !PageDMA(page))
- || (!(gfp_mask & __GFP_BIGMEM) && PageBIGMEM(page)))
+ || (!(gfp_mask & __GFP_HIGHMEM) && PageHighMem(page)))
goto out_failed;
/*
* memory, and we should just continue our scan.
*/
if (PageSwapCache(page)) {
- entry = page->offset;
+ entry = get_pagecache_pte(page);
swap_duplicate(entry);
- set_pte(page_table, __pte(entry));
+ set_pte(page_table, entry);
drop_pte:
vma->vm_mm->rss--;
flush_tlb_page(vma, address);
* page with that swap entry.
*/
entry = acquire_swap_entry(page);
- if (!entry)
+ if (!pte_val(entry))
goto out_failed; /* No swap space left */
- if (!(page = prepare_bigmem_swapout(page)))
+ if (!(page = prepare_highmem_swapout(page)))
goto out_swap_free;
vma->vm_mm->rss--;
- set_pte(page_table, __pte(entry));
+ set_pte(page_table, entry);
vmlist_access_unlock(vma->vm_mm);
flush_tlb_page(vma, address);
if (pmd_none(*dir))
return 0;
if (pmd_bad(*dir)) {
- printk("swap_out_pmd: bad pmd (%08lx)\n", pmd_val(*dir));
+ pmd_ERROR(*dir);
pmd_clear(dir);
return 0;
}
return result;
address += PAGE_SIZE;
pte++;
- } while (address < end);
+ } while (address && (address < end));
return 0;
}
if (pgd_none(*dir))
return 0;
if (pgd_bad(*dir)) {
- printk("swap_out_pgd: bad pgd (%08lx)\n", pgd_val(*dir));
+ pgd_ERROR(*dir);
pgd_clear(dir);
return 0;
}
pmd = pmd_offset(dir, address);
pgd_end = (address + PGDIR_SIZE) & PGDIR_MASK;
- if (end > pgd_end)
+ if (pgd_end && (end > pgd_end))
end = pgd_end;
do {
return result;
address = (address + PMD_SIZE) & PMD_MASK;
pmd++;
- } while (address < end);
+ } while (address && (address < end));
return 0;
}
pgdir = pgd_offset(vma->vm_mm, address);
end = vma->vm_end;
- while (address < end) {
+ if (address >= end)
+ BUG();
+ do {
int result = swap_out_pgd(vma, pgdir, address, end, gfp_mask);
if (result)
return result;
address = (address + PGDIR_SIZE) & PGDIR_MASK;
pgdir++;
- }
+ } while (address && (address < end));
return 0;
}
*/
do {
/* kswapd is critical to provide GFP_ATOMIC
- allocations (not GFP_BIGMEM ones). */
- if (nr_free_pages - nr_free_bigpages >= freepages.high)
+ allocations (not GFP_HIGHMEM ones). */
+ if (nr_free_pages - nr_free_highpages >= freepages.high)
break;
if (!do_try_to_free_pages(GFP_KSWAPD))