From 0c7791dbc6d086d121176e9b8c9a1ce7f3a25343 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 23 Nov 2007 15:29:00 -0500 Subject: [PATCH] Import 2.3.30pre7 --- Documentation/vm/locking | 32 +++++- Documentation/vm/numa | 41 +++++++ arch/arm/mm/small_page.c | 2 +- arch/i386/boot/setup.S | 2 +- arch/i386/kernel/mtrr.c | 19 ---- arch/ppc/kernel/ppc_htab.c | 19 ---- arch/sparc/mm/generic.c | 2 + arch/sparc64/mm/generic.c | 2 + drivers/ap1000/ringbuf.c | 19 ---- drivers/isdn/divert/divert_procfs.c | 16 --- drivers/pci/proc.c | 19 ---- drivers/pnp/isapnp_proc.c | 16 --- drivers/scsi/aic7xxx.c | 3 +- drivers/usb/proc_usb.c | 19 ---- drivers/zorro/proc.c | 19 ---- fs/adfs/dir.c | 2 - fs/adfs/file.c | 2 - fs/affs/dir.c | 2 - fs/affs/file.c | 4 - fs/affs/symlink.c | 2 - fs/autofs/dir.c | 2 - fs/autofs/root.c | 2 - fs/autofs/symlink.c | 2 - fs/bad_inode.c | 2 - fs/bfs/dir.c | 2 - fs/bfs/file.c | 2 - fs/buffer.c | 2 +- fs/coda/dir.c | 2 - fs/coda/file.c | 2 - fs/coda/pioctl.c | 2 - fs/coda/symlink.c | 2 - fs/devices.c | 4 - fs/devpts/root.c | 2 - fs/efs/dir.c | 2 - fs/efs/file.c | 2 - fs/efs/symlink.c | 2 - fs/exec.c | 1 + fs/ext2/dir.c | 2 - fs/ext2/file.c | 2 - fs/ext2/symlink.c | 2 - fs/fat/file.c | 2 - fs/fifo.c | 19 ---- fs/hfs/dir_cap.c | 6 - fs/hfs/dir_dbl.c | 2 - fs/hfs/dir_nat.c | 4 - fs/hfs/file.c | 2 - fs/hfs/file_cap.c | 2 - fs/hfs/file_hdr.c | 2 - fs/hpfs/inode.c | 6 - fs/isofs/dir.c | 2 - fs/isofs/file.c | 2 - fs/isofs/symlink.c | 2 - fs/minix/dir.c | 2 - fs/minix/file.c | 2 - fs/minix/symlink.c | 2 - fs/msdos/namei.c | 2 - fs/ncpfs/dir.c | 2 - fs/ncpfs/file.c | 2 - fs/ncpfs/symlink.c | 2 - fs/nfs/dir.c | 94 ++++++++-------- fs/nfs/file.c | 2 - fs/nfs/symlink.c | 121 ++++++++------------ fs/ntfs/fs.c | 6 - fs/open.c | 6 +- fs/pipe.c | 19 ---- fs/proc/base.c | 3 - fs/proc/kmsg.c | 19 ---- fs/proc/omirr.c | 19 ---- fs/proc/openpromfs.c | 40 ------- fs/proc/proc_devtree.c | 95 +--------------- fs/qnx4/dir.c | 2 - fs/qnx4/file.c | 6 +- fs/qnx4/symlinks.c | 2 - fs/romfs/inode.c | 6 - fs/smbfs/cache.c | 149 +++++++++---------------- fs/smbfs/dir.c | 2 - fs/smbfs/file.c | 2 - fs/sysv/dir.c | 2 - fs/sysv/file.c | 2 - fs/sysv/symlink.c | 2 - fs/udf/dir.c | 2 - fs/udf/file.c | 4 - fs/udf/symlink.c | 34 +++--- fs/ufs/dir.c | 2 - fs/ufs/file.c | 2 - fs/ufs/symlink.c | 2 - fs/umsdos/dir.c | 2 - fs/umsdos/rdir.c | 1 - fs/umsdos/symlink.c | 2 - fs/vfat/namei.c | 2 - include/asm-alpha/pgtable.h | 1 - include/asm-arm/pgtable.h | 1 - include/asm-i386/pgtable.h | 1 - include/asm-ppc/pgtable.h | 1 - include/linux/bootmem.h | 27 +++++ include/linux/fs.h | 10 +- include/linux/highmem.h | 2 +- include/linux/mm.h | 116 ++++++------------- include/linux/mmzone.h | 115 +++++++++++++++++++ include/linux/pagemap.h | 1 - include/linux/swap.h | 1 + kernel/ksyms.c | 11 +- kernel/module.c | 18 ++- kernel/sysctl.c | 2 - mm/Makefile | 2 +- mm/bootmem.c | 165 +++++++++++++++++++--------- mm/filemap.c | 53 +++------ mm/memory.c | 36 ++++-- mm/mmap.c | 13 +-- mm/numa.c | 103 +++++++++++++++++ mm/page_alloc.c | 101 +++++++++++------ mm/swap.c | 10 +- mm/swap_state.c | 48 +++++--- mm/vmscan.c | 10 +- net/wanrouter/wanproc.c | 4 - 115 files changed, 820 insertions(+), 1032 deletions(-) create mode 100644 Documentation/vm/numa create mode 100644 include/linux/mmzone.h create mode 100644 mm/numa.c diff --git a/Documentation/vm/locking b/Documentation/vm/locking index 0e4e503ae7ac..54c8a6ce063c 100644 --- a/Documentation/vm/locking +++ b/Documentation/vm/locking @@ -75,7 +75,11 @@ having the vmlist protection in this case. The vmlist lock nests with the inode i_shared_lock and the kmem cache c_spinlock spinlocks. This is okay, since code that holds i_shared_lock never asks for memory, and the kmem code asks for pages after dropping -c_spinlock. +c_spinlock. The vmlist lock also nests with pagecache_lock and +pagemap_lru_lock spinlocks, and no code asks for memory with these locks +held. + +The vmlist lock is grabbed while holding the kernel_lock spinning monitor. The vmlist lock can be a sleeping or spin lock. In either case, care must be taken that it is not held on entry to the driver methods, since @@ -106,3 +110,29 @@ from disk, and an unmap -> swap_free making the handle unused, the swap delete and readahead code grabs a temp reference on the swaphandle to prevent warning messages from swap_duplicate <- read_swap_cache_async. +Swap cache locking +------------------ +Pages are added into the swap cache with kernel_lock held, to make sure +that multiple pages are not being added (and hence lost) by associating +all of them with the same swaphandle. + +Pages are guaranteed not to be removed from the scache if the page is +"shared": ie, other processes hold reference on the page or the associated +swap handle. The only code that does not follow this rule is shrink_mmap, +which deletes pages from the swap cache if no process has a reference on +the page (multiple processes might have references on the corresponding +swap handle though). lookup_swap_cache() races with shrink_mmap, when +establishing a reference on a scache page, so, it must check whether the +page it located is still in the swapcache, or shrink_mmap deleted it. +(This race is due to the fact that shrink_mmap looks at the page ref +count with pagecache_lock, but then drops pagecache_lock before deleting +the page from the scache). + +do_wp_page and do_swap_page have MP races in them while trying to figure +out whether a page is "shared", by looking at the page_count + swap_count. +To preserve the sum of the counts, the page lock _must_ be acquired before +calling is_page_shared (else processes might switch their swap_count refs +to the page count refs, after the page count ref has been snapshotted). + +Swap device deletion code currently breaks all the scache assumptions, +since it grabs neither mmap_sem nor page_table_lock. diff --git a/Documentation/vm/numa b/Documentation/vm/numa new file mode 100644 index 000000000000..21a3442b77b9 --- /dev/null +++ b/Documentation/vm/numa @@ -0,0 +1,41 @@ +Started Nov 1999 by Kanoj Sarcar + +The intent of this file is to have an uptodate, running commentary +from different people about NUMA specific code in the Linux vm. + +What is NUMA? It is an architecture where the memory access times +for different regions of memory from a given processor varies +according to the "distance" of the memory region from the processor. +Each region of memory to which access times are the same from any +cpu, is called a node. On such architectures, it is beneficial if +the kernel tries to minimize inter node communications. Schemes +for this range from kernel text and read-only data replication +across nodes, and trying to house all the data structures that +key components of the kernel need on memory on that node. + +Currently, all the numa support is to provide efficient handling +of widely discontiguous physical memory, so architectures which +are not NUMA but can have huge holes in the physical address space +can use the same code. All this code is bracketed by CONFIG_DISCONTIGMEM. + +The initial port includes NUMAizing the bootmem allocator code by +encapsulating all the pieces of information into a bootmem_data_t +structure. Node specific calls have been added to the allocator. +In theory, any platform which uses the bootmem allocator should +be able to to put the bootmem and mem_map data structures anywhere +it deems best. + +Each node's page allocation data structures have also been encapsulated +into a pg_data_t. The bootmem_data_t is just one part of this. To +make the code look uniform between NUMA and regular UMA platforms, +UMA platforms have a statically allocated pg_data_t too (contig_page_data). +For the sake of uniformity, the variable "numnodes" is also defined +for all platforms. As we run benchmarks, we might decide to NUMAize +more variables like low_on_memory, nr_free_pages etc into the pg_data_t. + +The NUMA aware page allocation code currently tries to allocate pages +from different nodes in a round robin manner. This will be changed to +do concentratic circle search, starting from current node, once the +NUMA port achieves more maturity. The call alloc_pages_node has been +added, so that drivers can make the call and not worry about whether +it is running on a NUMA or UMA platform. diff --git a/arch/arm/mm/small_page.c b/arch/arm/mm/small_page.c index bf187ea162fa..ee7f571a72c1 100644 --- a/arch/arm/mm/small_page.c +++ b/arch/arm/mm/small_page.c @@ -116,7 +116,7 @@ again: remove_page_from_queue(page); restore_flags(flags); - return __page_address(page) + (offset << order->shift); + return page_address(page) + (offset << order->shift); need_new_page: page = alloc_page(priority); diff --git a/arch/i386/boot/setup.S b/arch/i386/boot/setup.S index 53f847e8bbb9..cbfa7b3e8ab8 100644 --- a/arch/i386/boot/setup.S +++ b/arch/i386/boot/setup.S @@ -208,7 +208,7 @@ bad_sig: addw $SYSSEG, %bx movw %bx, %cs:start_sys_seg # Move rest of setup code/data to here - movw $4096, %di # four sectors loaded by LILO + movw $2048, %di # four sectors loaded by LILO subw %si, %si movw %cs, %ax # aka SETUPSEG movw %ax, %es diff --git a/arch/i386/kernel/mtrr.c b/arch/i386/kernel/mtrr.c index 57d9b8b457f4..1c1f6b74bb5b 100644 --- a/arch/i386/kernel/mtrr.c +++ b/arch/i386/kernel/mtrr.c @@ -1488,25 +1488,6 @@ static struct file_operations mtrr_fops = static struct inode_operations proc_mtrr_inode_operations = { &mtrr_fops, /* default property file-ops */ - NULL, /* create */ - NULL, /* lookup */ - NULL, /* link */ - NULL, /* unlink */ - NULL, /* symlink */ - NULL, /* mkdir */ - NULL, /* rmdir */ - NULL, /* mknod */ - NULL, /* rename */ - NULL, /* readlink */ - NULL, /* follow_link */ - NULL, /* get_block */ - NULL, /* readpage */ - NULL, /* writepage */ - NULL, /* flushpage */ - NULL, /* truncate */ - NULL, /* permission */ - NULL, /* smap */ - NULL /* revalidate */ }; static struct proc_dir_entry *proc_root_mtrr; diff --git a/arch/ppc/kernel/ppc_htab.c b/arch/ppc/kernel/ppc_htab.c index 0b7c77683a74..b90fa7a2c631 100644 --- a/arch/ppc/kernel/ppc_htab.c +++ b/arch/ppc/kernel/ppc_htab.c @@ -63,25 +63,6 @@ static struct file_operations ppc_htab_operations = { */ struct inode_operations proc_ppc_htab_inode_operations = { &ppc_htab_operations, /* default proc file-ops */ - NULL, /* create */ - NULL, /* lookup */ - NULL, /* link */ - NULL, /* unlink */ - NULL, /* symlink */ - NULL, /* mkdir */ - NULL, /* rmdir */ - NULL, /* mknod */ - NULL, /* rename */ - NULL, /* readlink */ - NULL, /* follow_link */ - NULL, /* get_block */ - NULL, /* readpage */ - NULL, /* writepage */ - NULL, /* flushpage */ - NULL, /* truncate */ - NULL, /* permission */ - NULL, /* smap */ - NULL /* revalidate */ }; /* these will go into processor.h when I'm done debugging -- Cort */ diff --git a/arch/sparc/mm/generic.c b/arch/sparc/mm/generic.c index ea94a8f60aaf..9669f5111b44 100644 --- a/arch/sparc/mm/generic.c +++ b/arch/sparc/mm/generic.c @@ -91,7 +91,9 @@ static inline int io_remap_pmd_range(pmd_t * pmd, unsigned long address, unsigne pte_t * pte = pte_alloc(pmd, address); if (!pte) return -ENOMEM; + spin_lock(¤t->mm->page_table_lock); io_remap_pte_range(pte, address, end - address, address + offset, prot, space); + spin_unlock(¤t->mm->page_table_lock); address = (address + PMD_SIZE) & PMD_MASK; pmd++; } while (address < end); diff --git a/arch/sparc64/mm/generic.c b/arch/sparc64/mm/generic.c index cf94f42501ac..be999f446293 100644 --- a/arch/sparc64/mm/generic.c +++ b/arch/sparc64/mm/generic.c @@ -127,7 +127,9 @@ static inline int io_remap_pmd_range(pmd_t * pmd, unsigned long address, unsigne pte_t * pte = pte_alloc(pmd, address); if (!pte) return -ENOMEM; + spin_lock(¤t->mm->page_table_lock); io_remap_pte_range(pte, address, end - address, address + offset, prot, space); + spin_unlock(¤t->mm->page_table_lock); address = (address + PMD_SIZE) & PMD_MASK; pmd++; } while (address < end); diff --git a/drivers/ap1000/ringbuf.c b/drivers/ap1000/ringbuf.c index 49c71673220d..8acb617b6940 100644 --- a/drivers/ap1000/ringbuf.c +++ b/drivers/ap1000/ringbuf.c @@ -308,23 +308,4 @@ static struct file_operations proc_ringbuf_operations = { struct inode_operations proc_ringbuf_inode_operations = { &proc_ringbuf_operations, /* default base directory file-ops */ - NULL, /* create */ - NULL, /* lookup */ - NULL, /* link */ - NULL, /* unlink */ - NULL, /* symlink */ - NULL, /* mkdir */ - NULL, /* rmdir */ - NULL, /* mknod */ - NULL, /* rename */ - NULL, /* readlink */ - NULL, /* follow_link */ - NULL, /* get_block */ - NULL, /* readpage */ - NULL, /* writepage */ - NULL, /* flushpage */ - NULL, /* truncate */ - NULL, /* permission */ - NULL, /* smap */ - NULL /* revalidate */ }; diff --git a/drivers/isdn/divert/divert_procfs.c b/drivers/isdn/divert/divert_procfs.c index 40bbce8be757..09f148b5a870 100644 --- a/drivers/isdn/divert/divert_procfs.c +++ b/drivers/isdn/divert/divert_procfs.c @@ -294,22 +294,6 @@ static struct file_operations isdn_fops = struct inode_operations divert_file_inode_operations = { &isdn_fops, /* default proc file-ops */ - NULL, /* create */ - NULL, /* lookup */ - NULL, /* link */ - NULL, /* unlink */ - NULL, /* symlink */ - NULL, /* mkdir */ - NULL, /* rmdir */ - NULL, /* mknod */ - NULL, /* rename */ - NULL, /* readlink */ - NULL, /* follow_link */ - NULL, /* readpage */ - NULL, /* writepage */ - NULL, /* bmap */ - NULL, /* truncate */ - NULL /* permission */ }; diff --git a/drivers/pci/proc.c b/drivers/pci/proc.c index 6a297fb66b04..40c006f4f4b6 100644 --- a/drivers/pci/proc.c +++ b/drivers/pci/proc.c @@ -207,25 +207,6 @@ static struct file_operations proc_bus_pci_operations = { static struct inode_operations proc_bus_pci_inode_operations = { &proc_bus_pci_operations, /* default base directory file-ops */ - NULL, /* create */ - NULL, /* lookup */ - NULL, /* link */ - NULL, /* unlink */ - NULL, /* symlink */ - NULL, /* mkdir */ - NULL, /* rmdir */ - NULL, /* mknod */ - NULL, /* rename */ - NULL, /* readlink */ - NULL, /* follow_link */ - NULL, /* get_block */ - NULL, /* readpage */ - NULL, /* writepage */ - NULL, /* flushpage */ - NULL, /* truncate */ - NULL, /* permission */ - NULL, /* smap */ - NULL /* revalidate */ }; #if BITS_PER_LONG == 32 diff --git a/drivers/pnp/isapnp_proc.c b/drivers/pnp/isapnp_proc.c index 19f3d0a8f411..f78e3488bf4c 100644 --- a/drivers/pnp/isapnp_proc.c +++ b/drivers/pnp/isapnp_proc.c @@ -212,22 +212,6 @@ static struct file_operations isapnp_info_entry_operations = static struct inode_operations isapnp_info_entry_inode_operations = { &isapnp_info_entry_operations, /* default sound info directory file-ops */ - NULL, /* create */ - NULL, /* lookup */ - NULL, /* link */ - NULL, /* unlink */ - NULL, /* symlink */ - NULL, /* mkdir */ - NULL, /* rmdir */ - NULL, /* mknod */ - NULL, /* rename */ - NULL, /* readlink */ - NULL, /* follow_link */ - NULL, /* readpage */ - NULL, /* writepage */ - NULL, /* bmap */ - NULL, /* truncate */ - NULL /* permission */ }; static int __init isapnp_proc_init(void) diff --git a/drivers/scsi/aic7xxx.c b/drivers/scsi/aic7xxx.c index 2c74bad30163..9cbbd2cde68f 100644 --- a/drivers/scsi/aic7xxx.c +++ b/drivers/scsi/aic7xxx.c @@ -1500,7 +1500,7 @@ aic_outb(struct aic7xxx_host *p, unsigned char val, long port) * to a parameter with a ':' between the parameter and the value. * ie. aic7xxx=unpause:0x0A,extended *-F*************************************************************************/ -static void +static int aic7xxx_setup(char *s) { int i, n; @@ -1638,6 +1638,7 @@ aic7xxx_setup(char *s) } } } + return 1; } __setup("aic7xxx=", aic7xxx_setup); diff --git a/drivers/usb/proc_usb.c b/drivers/usb/proc_usb.c index b2a6ff6ff727..c0be82bc9ca9 100644 --- a/drivers/usb/proc_usb.c +++ b/drivers/usb/proc_usb.c @@ -949,25 +949,6 @@ static struct file_operations proc_usb_device_file_operations = { static struct inode_operations proc_usb_device_inode_operations = { &proc_usb_device_file_operations, /* file-ops */ - NULL, /* create */ - NULL, /* lookup */ - NULL, /* link */ - NULL, /* unlink */ - NULL, /* symlink */ - NULL, /* mkdir */ - NULL, /* rmdir */ - NULL, /* mknod */ - NULL, /* rename */ - NULL, /* readlink */ - NULL, /* follow_link */ - NULL, /* get_block */ - NULL, /* readpage */ - NULL, /* writepage */ - NULL, /* flushpage */ - NULL, /* truncate */ - NULL, /* permission */ - NULL, /* smap */ - NULL /* revalidate */ }; void proc_usb_add_bus(struct usb_bus *bus) diff --git a/drivers/zorro/proc.c b/drivers/zorro/proc.c index 29c4bb4af600..cf981cf3b06f 100644 --- a/drivers/zorro/proc.c +++ b/drivers/zorro/proc.c @@ -78,25 +78,6 @@ static struct file_operations proc_bus_zorro_operations = { static struct inode_operations proc_bus_zorro_inode_operations = { &proc_bus_zorro_operations, /* default base directory file-ops */ - NULL, /* create */ - NULL, /* lookup */ - NULL, /* link */ - NULL, /* unlink */ - NULL, /* symlink */ - NULL, /* mkdir */ - NULL, /* rmdir */ - NULL, /* mknod */ - NULL, /* rename */ - NULL, /* readlink */ - NULL, /* follow_link */ - NULL, /* get_block */ - NULL, /* readpage */ - NULL, /* writepage */ - NULL, /* flushpage */ - NULL, /* truncate */ - NULL, /* permission */ - NULL, /* smap */ - NULL /* revalidate */ }; static int diff --git a/fs/adfs/dir.c b/fs/adfs/dir.c index 0d0d11a06f16..f3eb4e872265 100644 --- a/fs/adfs/dir.c +++ b/fs/adfs/dir.c @@ -54,10 +54,8 @@ struct inode_operations adfs_dir_inode_operations = { NULL, /* get_block */ NULL, /* read page */ NULL, /* write page */ - NULL, /* flush page */ NULL, /* truncate */ NULL, /* permission */ - NULL, /* smap */ NULL /* revalidate */ }; diff --git a/fs/adfs/file.c b/fs/adfs/file.c index c415597e3b98..2881e81f1f39 100644 --- a/fs/adfs/file.c +++ b/fs/adfs/file.c @@ -64,9 +64,7 @@ struct inode_operations adfs_file_inode_operations = { adfs_bmap, /* get_block */ block_read_full_page, /* readpage */ NULL, /* writepage */ - NULL, /* flushpage */ NULL, /* truncate */ NULL, /* permission */ - NULL, /* smap */ NULL /* revalidate */ }; diff --git a/fs/affs/dir.c b/fs/affs/dir.c index b88eac7706d2..f126dcbb8030 100644 --- a/fs/affs/dir.c +++ b/fs/affs/dir.c @@ -60,10 +60,8 @@ struct inode_operations affs_dir_inode_operations = { NULL, /* get_block */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* flushpage */ NULL, /* truncate */ NULL, /* permissions */ - NULL, /* smap */ NULL /* revalidate */ }; diff --git a/fs/affs/file.c b/fs/affs/file.c index 358c1c56cccd..e32b9c344c68 100644 --- a/fs/affs/file.c +++ b/fs/affs/file.c @@ -77,10 +77,8 @@ struct inode_operations affs_file_inode_operations = { affs_bmap, /* get_block */ block_read_full_page, /* readpage */ NULL, /* writepage */ - NULL, /* flushpage */ affs_truncate, /* truncate */ NULL, /* permission */ - NULL, /* smap */ NULL /* revalidate */ }; @@ -118,10 +116,8 @@ struct inode_operations affs_file_inode_operations_ofs = { NULL, /* get_block */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* flushpage */ affs_truncate, /* truncate */ NULL, /* permission */ - NULL, /* smap */ NULL /* revalidate */ }; diff --git a/fs/affs/symlink.c b/fs/affs/symlink.c index 18c03730f478..c979134b4a84 100644 --- a/fs/affs/symlink.c +++ b/fs/affs/symlink.c @@ -38,10 +38,8 @@ struct inode_operations affs_symlink_inode_operations = { NULL, /* get_block */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* flushpage */ NULL, /* truncate */ NULL, /* permission */ - NULL, /* smap */ NULL /* revalidate */ }; diff --git a/fs/autofs/dir.c b/fs/autofs/dir.c index a8262e7018c9..018b6787b7f4 100644 --- a/fs/autofs/dir.c +++ b/fs/autofs/dir.c @@ -76,10 +76,8 @@ struct inode_operations autofs_dir_inode_operations = { NULL, /* get_block */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* flushpage */ NULL, /* truncate */ NULL, /* permission */ - NULL, /* smap */ NULL /* revalidate */ }; diff --git a/fs/autofs/root.c b/fs/autofs/root.c index c6034c7548bb..4f0569ca736e 100644 --- a/fs/autofs/root.c +++ b/fs/autofs/root.c @@ -57,10 +57,8 @@ struct inode_operations autofs_root_inode_operations = { NULL, /* get_block */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* flushpage */ NULL, /* truncate */ NULL, /* permission */ - NULL, /* smap */ NULL /* revalidate */ }; diff --git a/fs/autofs/symlink.c b/fs/autofs/symlink.c index 6ca720527314..be59d3e65eb7 100644 --- a/fs/autofs/symlink.c +++ b/fs/autofs/symlink.c @@ -52,9 +52,7 @@ struct inode_operations autofs_symlink_inode_operations = { NULL, /* get_block */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* flushpage */ NULL, /* truncate */ NULL, /* permission */ - NULL, /* smap */ NULL /* revalidate */ }; diff --git a/fs/bad_inode.c b/fs/bad_inode.c index 2691953f2d0e..4369e2a86e95 100644 --- a/fs/bad_inode.c +++ b/fs/bad_inode.c @@ -63,10 +63,8 @@ struct inode_operations bad_inode_ops = EIO_ERROR, /* get_block */ EIO_ERROR, /* readpage */ EIO_ERROR, /* writepage */ - EIO_ERROR, /* flushpage */ EIO_ERROR, /* truncate */ EIO_ERROR, /* permission */ - EIO_ERROR, /* smap */ EIO_ERROR /* revalidate */ }; diff --git a/fs/bfs/dir.c b/fs/bfs/dir.c index 8aa2004514c9..6f5c240d2ac0 100644 --- a/fs/bfs/dir.c +++ b/fs/bfs/dir.c @@ -283,10 +283,8 @@ struct inode_operations bfs_dir_inops = { get_block: NULL, readpage: NULL, writepage: NULL, - flushpage: NULL, truncate: NULL, permission: NULL, - smap: NULL, revalidate: NULL }; diff --git a/fs/bfs/file.c b/fs/bfs/file.c index 4bc1ed99a5d4..856ed33cd16d 100644 --- a/fs/bfs/file.c +++ b/fs/bfs/file.c @@ -68,9 +68,7 @@ struct inode_operations bfs_file_inops = { get_block: bfs_get_block, readpage: block_read_full_page, writepage: block_write_full_page, - flushpage: block_flushpage, truncate: NULL, permission: NULL, - smap: NULL, revalidate: NULL }; diff --git a/fs/buffer.c b/fs/buffer.c index 910e29db3abd..bf3da4a4d836 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1236,7 +1236,7 @@ static void unmap_buffer(struct buffer_head * bh) * we have truncated the file and are going to free the * blocks on-disk.. */ -int block_flushpage(struct inode *inode, struct page *page, unsigned long offset) +int block_flushpage(struct page *page, unsigned long offset) { struct buffer_head *head, *bh, *next; unsigned int curr_off = 0; diff --git a/fs/coda/dir.c b/fs/coda/dir.c index 6c90a4116b1e..794a8d263af8 100644 --- a/fs/coda/dir.c +++ b/fs/coda/dir.c @@ -80,10 +80,8 @@ struct inode_operations coda_dir_inode_operations = NULL, /* get_block */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* flushpage */ NULL, /* truncate */ coda_permission, /* permission */ - NULL, /* smap */ coda_revalidate_inode /* revalidate */ }; diff --git a/fs/coda/file.c b/fs/coda/file.c index 19754f0d6d1c..d053258e8a71 100644 --- a/fs/coda/file.c +++ b/fs/coda/file.c @@ -50,10 +50,8 @@ struct inode_operations coda_file_inode_operations = { NULL, /* get_block */ coda_readpage, /* readpage */ NULL, /* writepage */ - NULL, /* flushpage */ NULL, /* truncate */ coda_permission, /* permission */ - NULL, /* smap */ coda_revalidate_inode /* revalidate */ }; diff --git a/fs/coda/pioctl.c b/fs/coda/pioctl.c index 3e6924f15f6d..cec92b7f4c91 100644 --- a/fs/coda/pioctl.c +++ b/fs/coda/pioctl.c @@ -49,10 +49,8 @@ struct inode_operations coda_ioctl_inode_operations = NULL, /* get_block */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* flushpage */ NULL, /* truncate */ coda_ioctl_permission, /* permission */ - NULL, /* smap */ NULL /* revalidate */ }; diff --git a/fs/coda/symlink.c b/fs/coda/symlink.c index cfe0dbcb759f..ecdf4968d9f6 100644 --- a/fs/coda/symlink.c +++ b/fs/coda/symlink.c @@ -45,10 +45,8 @@ struct inode_operations coda_symlink_inode_operations = { NULL, /* get_block */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* flushpage */ NULL, /* truncate */ NULL, /* permission */ - NULL, /* smap */ NULL /* revalidate */ }; diff --git a/fs/devices.c b/fs/devices.c index a436f60de4da..b44acfacf7d9 100644 --- a/fs/devices.c +++ b/fs/devices.c @@ -281,10 +281,8 @@ struct inode_operations blkdev_inode_operations = { NULL, /* get_block */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* flushpage */ NULL, /* truncate */ NULL, /* permission */ - NULL, /* smap */ NULL /* revalidate */ }; @@ -338,10 +336,8 @@ struct inode_operations chrdev_inode_operations = { NULL, /* get_block */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* flushpage */ NULL, /* truncate */ NULL, /* permission */ - NULL, /* smap */ NULL /* revalidate */ }; diff --git a/fs/devpts/root.c b/fs/devpts/root.c index 5b5948e8078a..9b4d194f223f 100644 --- a/fs/devpts/root.c +++ b/fs/devpts/root.c @@ -54,10 +54,8 @@ struct inode_operations devpts_root_inode_operations = { NULL, /* get_block */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* flushpage */ NULL, /* truncate */ NULL, /* permission */ - NULL, /* smap */ NULL /* revalidate */ }; diff --git a/fs/efs/dir.c b/fs/efs/dir.c index 29f2ebbc3f33..05a3334f74a3 100644 --- a/fs/efs/dir.c +++ b/fs/efs/dir.c @@ -43,10 +43,8 @@ struct inode_operations efs_dir_inode_operations = { efs_get_block, /* get_block */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* flushpage */ NULL, /* truncate */ NULL, /* permission */ - NULL, /* smap */ NULL /* revalidate */ }; diff --git a/fs/efs/file.c b/fs/efs/file.c index b86965f8fa8e..adcc03120bff 100644 --- a/fs/efs/file.c +++ b/fs/efs/file.c @@ -94,9 +94,7 @@ struct inode_operations efs_file_inode_operations = { efs_get_block, /* get_block */ block_read_full_page, /* readpage */ NULL, /* writepage */ - NULL, /* flushpage */ NULL, /* truncate */ NULL, /* permission */ - NULL, /* smap */ NULL /* revalidate */ }; diff --git a/fs/efs/symlink.c b/fs/efs/symlink.c index 69ddda45b164..2b3556995bb5 100644 --- a/fs/efs/symlink.c +++ b/fs/efs/symlink.c @@ -29,10 +29,8 @@ struct inode_operations efs_symlink_inode_operations = { NULL, /* get_block */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* flushpage */ NULL, /* truncate */ NULL, /* permission */ - NULL, /* smap */ NULL /* revalidate */ }; diff --git a/fs/exec.c b/fs/exec.c index d26743c1548f..e329252d19dc 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -371,6 +371,7 @@ static int exec_mmap(void) if (mm) { struct mm_struct *active_mm = current->active_mm; + init_new_context(current, mm); current->mm = mm; current->active_mm = mm; activate_mm(active_mm, mm); diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c index 131702e3d57f..2cc69ee5bb3f 100644 --- a/fs/ext2/dir.c +++ b/fs/ext2/dir.c @@ -66,10 +66,8 @@ struct inode_operations ext2_dir_inode_operations = { NULL, /* get_block */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* flushpage */ NULL, /* truncate */ ext2_permission, /* permission */ - NULL, /* smap */ NULL /* revalidate */ }; diff --git a/fs/ext2/file.c b/fs/ext2/file.c index cc2d40ab3e38..5ebba9da1b4f 100644 --- a/fs/ext2/file.c +++ b/fs/ext2/file.c @@ -176,9 +176,7 @@ struct inode_operations ext2_file_inode_operations = { ext2_get_block, /* get_block */ block_read_full_page, /* readpage */ block_write_full_page, /* writepage */ - block_flushpage, /* flushpage */ ext2_truncate, /* truncate */ ext2_permission, /* permission */ - NULL, /* smap */ NULL, /* revalidate */ }; diff --git a/fs/ext2/symlink.c b/fs/ext2/symlink.c index 03633b2e4463..c830d5baa3df 100644 --- a/fs/ext2/symlink.c +++ b/fs/ext2/symlink.c @@ -42,10 +42,8 @@ struct inode_operations ext2_symlink_inode_operations = { NULL, /* get_block */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* flushpage */ NULL, /* truncate */ NULL, /* permission */ - NULL, /* smap */ NULL /* revalidate */ }; diff --git a/fs/fat/file.c b/fs/fat/file.c index 0f96aa20404a..06da340fb216 100644 --- a/fs/fat/file.c +++ b/fs/fat/file.c @@ -60,10 +60,8 @@ struct inode_operations fat_file_inode_operations = { fat_get_block, /* get_block */ block_read_full_page, /* readpage */ NULL, /* writepage */ - block_flushpage, /* flushpage */ fat_truncate, /* truncate */ NULL, /* permission */ - NULL, /* smap */ NULL /* revalidate */ }; diff --git a/fs/fifo.c b/fs/fifo.c index 757b2c4f444f..516e690c9cd3 100644 --- a/fs/fifo.c +++ b/fs/fifo.c @@ -169,25 +169,6 @@ static struct file_operations def_fifo_fops = { struct inode_operations fifo_inode_operations = { &def_fifo_fops, /* default file operations */ - NULL, /* create */ - NULL, /* lookup */ - NULL, /* link */ - NULL, /* unlink */ - NULL, /* symlink */ - NULL, /* mkdir */ - NULL, /* rmdir */ - NULL, /* mknod */ - NULL, /* rename */ - NULL, /* readlink */ - NULL, /* follow_link */ - NULL, /* get_block */ - NULL, /* readpage */ - NULL, /* writepage */ - NULL, /* flushpage */ - NULL, /* truncate */ - NULL, /* permission */ - NULL, /* smap */ - NULL /* revalidate */ }; diff --git a/fs/hfs/dir_cap.c b/fs/hfs/dir_cap.c index a5ce908a7622..602d85c522a7 100644 --- a/fs/hfs/dir_cap.c +++ b/fs/hfs/dir_cap.c @@ -90,10 +90,8 @@ struct inode_operations hfs_cap_ndir_inode_operations = { NULL, /* get_block */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* flushpage */ NULL, /* truncate */ NULL, /* permission */ - NULL, /* smap */ NULL /* revalidate */ }; @@ -113,10 +111,8 @@ struct inode_operations hfs_cap_fdir_inode_operations = { NULL, /* get_block */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* flushpage */ NULL, /* truncate */ NULL, /* permission */ - NULL, /* smap */ NULL /* revalidate */ }; @@ -136,10 +132,8 @@ struct inode_operations hfs_cap_rdir_inode_operations = { NULL, /* get_block */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* flushpage */ NULL, /* truncate */ NULL, /* permission */ - NULL, /* smap */ NULL /* revalidate */ }; diff --git a/fs/hfs/dir_dbl.c b/fs/hfs/dir_dbl.c index d68f7ed6a9c6..bc32f23c13d5 100644 --- a/fs/hfs/dir_dbl.c +++ b/fs/hfs/dir_dbl.c @@ -89,10 +89,8 @@ struct inode_operations hfs_dbl_dir_inode_operations = { NULL, /* get_block */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* flushpage */ NULL, /* truncate */ NULL, /* permission */ - NULL, /* smap */ NULL /* revalidate */ }; diff --git a/fs/hfs/dir_nat.c b/fs/hfs/dir_nat.c index aa8a90220b57..855d3434e13d 100644 --- a/fs/hfs/dir_nat.c +++ b/fs/hfs/dir_nat.c @@ -96,10 +96,8 @@ struct inode_operations hfs_nat_ndir_inode_operations = { NULL, /* get_block */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* flushpage */ NULL, /* truncate */ NULL, /* permission */ - NULL, /* smap */ NULL /* revalidate */ }; @@ -119,10 +117,8 @@ struct inode_operations hfs_nat_hdir_inode_operations = { NULL, /* get_block */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* flushpage */ NULL, /* truncate */ NULL, /* permission */ - NULL, /* smap */ NULL /* revalidate */ }; diff --git a/fs/hfs/file.c b/fs/hfs/file.c index 594d6527152a..ec1d5ada4100 100644 --- a/fs/hfs/file.c +++ b/fs/hfs/file.c @@ -66,10 +66,8 @@ struct inode_operations hfs_file_inode_operations = { hfs_bmap, /* get_block */ block_read_full_page, /* readpage */ NULL, /* writepage */ - NULL, /* flushpage */ hfs_file_truncate, /* truncate */ NULL, /* permission */ - NULL, /* smap */ NULL /* revalidate */ }; diff --git a/fs/hfs/file_cap.c b/fs/hfs/file_cap.c index e96dd7f20bee..23566021b942 100644 --- a/fs/hfs/file_cap.c +++ b/fs/hfs/file_cap.c @@ -80,10 +80,8 @@ struct inode_operations hfs_cap_info_inode_operations = { NULL, /* get_block - none */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* flushpage */ cap_info_truncate, /* truncate */ NULL, /* permission */ - NULL, /* smap */ NULL /* revalidata */ }; diff --git a/fs/hfs/file_hdr.c b/fs/hfs/file_hdr.c index 58a12133a57e..bb7d82e4fddf 100644 --- a/fs/hfs/file_hdr.c +++ b/fs/hfs/file_hdr.c @@ -82,10 +82,8 @@ struct inode_operations hfs_hdr_inode_operations = { header part has no disk block */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* flushpage */ hdr_truncate, /* truncate */ NULL, /* permission */ - NULL, /* smap */ NULL /* revalidate */ }; diff --git a/fs/hpfs/inode.c b/fs/hpfs/inode.c index d79e5581476c..4acf7b0e209d 100644 --- a/fs/hpfs/inode.c +++ b/fs/hpfs/inode.c @@ -44,10 +44,8 @@ static const struct inode_operations hpfs_file_iops = &hpfs_get_block, /* get_block */ block_read_full_page, /* readpage */ block_write_full_page, /* writepage */ - block_flushpage, /* flushpage */ hpfs_truncate, /* truncate */ NULL, /* permission */ - NULL, /* smap */ NULL, /* revalidate */ }; @@ -87,10 +85,8 @@ static const struct inode_operations hpfs_dir_iops = NULL, /* get_block */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* flushpage */ NULL, /* truncate */ NULL, /* permission */ - NULL, /* smap */ NULL /* revalidate */ }; @@ -111,10 +107,8 @@ const struct inode_operations hpfs_symlink_iops = NULL, /* get_block */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* flushpage */ NULL, /* truncate */ NULL, /* permission */ - NULL, /* smap */ NULL /* revalidate */ }; diff --git a/fs/isofs/dir.c b/fs/isofs/dir.c index bc64dfdd587b..fb60d3a5545b 100644 --- a/fs/isofs/dir.c +++ b/fs/isofs/dir.c @@ -60,10 +60,8 @@ struct inode_operations isofs_dir_inode_operations = NULL, /* get_block */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* flushpage */ NULL, /* truncate */ NULL, /* permission */ - NULL, /* smap */ NULL /* revalidate */ }; diff --git a/fs/isofs/file.c b/fs/isofs/file.c index fd9b124ed53c..b15b6c5dcb9b 100644 --- a/fs/isofs/file.c +++ b/fs/isofs/file.c @@ -51,9 +51,7 @@ struct inode_operations isofs_file_inode_operations = { isofs_get_block, /* get_block */ block_read_full_page, /* readpage */ NULL, /* writepage */ - NULL, /* flushpage */ NULL, /* truncate */ NULL, /* permission */ - NULL, /* smap */ NULL /* revalidate */ }; diff --git a/fs/isofs/symlink.c b/fs/isofs/symlink.c index e5a7a2c726d7..e4e0fb837e49 100644 --- a/fs/isofs/symlink.c +++ b/fs/isofs/symlink.c @@ -41,10 +41,8 @@ struct inode_operations isofs_symlink_inode_operations = { NULL, /* get_block */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* flushpage */ NULL, /* truncate */ NULL, /* permission */ - NULL, /* smap */ NULL /* revalidate */ }; diff --git a/fs/minix/dir.c b/fs/minix/dir.c index a44d5d69d5d5..52c0e0ceefdf 100644 --- a/fs/minix/dir.c +++ b/fs/minix/dir.c @@ -55,10 +55,8 @@ struct inode_operations minix_dir_inode_operations = { NULL, /* get_block */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* flushpage */ NULL, /* truncate */ NULL, /* permission */ - NULL, /* smap */ NULL /* revalidate */ }; diff --git a/fs/minix/file.c b/fs/minix/file.c index 6683c393cc64..4240f622ecdf 100644 --- a/fs/minix/file.c +++ b/fs/minix/file.c @@ -73,9 +73,7 @@ struct inode_operations minix_file_inode_operations = { minix_get_block, /* get_block */ block_read_full_page, /* readpage */ block_write_full_page, /* writepage */ - block_flushpage, /* flushpage */ minix_truncate, /* truncate */ NULL, /* permission */ - NULL, /* smap */ NULL, /* revalidate */ }; diff --git a/fs/minix/symlink.c b/fs/minix/symlink.c index 3a8951b096dd..2fa5a34a2aaa 100644 --- a/fs/minix/symlink.c +++ b/fs/minix/symlink.c @@ -36,10 +36,8 @@ struct inode_operations minix_symlink_inode_operations = { NULL, /* get_block */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* flushpage */ NULL, /* truncate */ NULL, /* permission */ - NULL, /* smap */ NULL /* revalidate */ }; diff --git a/fs/msdos/namei.c b/fs/msdos/namei.c index d69aba48d2e4..ea0b0561620d 100644 --- a/fs/msdos/namei.c +++ b/fs/msdos/namei.c @@ -600,10 +600,8 @@ struct inode_operations msdos_dir_inode_operations = { NULL, /* get_block */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* flushpage */ NULL, /* truncate */ NULL, /* permission */ - NULL, /* smap */ NULL, /* revalidate */ }; diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c index 30d4588d1c68..96f6dc8a16cc 100644 --- a/fs/ncpfs/dir.c +++ b/fs/ncpfs/dir.c @@ -81,10 +81,8 @@ struct inode_operations ncp_dir_inode_operations = NULL, /* get_block */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* flushpage */ NULL, /* truncate */ NULL, /* permission */ - NULL, /* smap */ NULL, /* revalidate */ }; diff --git a/fs/ncpfs/file.c b/fs/ncpfs/file.c index 4a907a9b1236..98fd9f23e067 100644 --- a/fs/ncpfs/file.c +++ b/fs/ncpfs/file.c @@ -294,9 +294,7 @@ struct inode_operations ncp_file_inode_operations = NULL, /* get_block */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* flushpage */ NULL, /* truncate */ NULL, /* permission */ - NULL, /* smap */ NULL /* revalidate */ }; diff --git a/fs/ncpfs/symlink.c b/fs/ncpfs/symlink.c index 81151959a85e..5d6954df9b0a 100644 --- a/fs/ncpfs/symlink.c +++ b/fs/ncpfs/symlink.c @@ -62,10 +62,8 @@ struct inode_operations ncp_symlink_inode_operations={ NULL, /* get_block */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* flushpage */ NULL, /* truncate */ NULL, /* permission */ - NULL, /* smap */ NULL /* revalidate */ }; diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 25a85744fbd4..7bb207067b13 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -81,10 +81,8 @@ struct inode_operations nfs_dir_inode_operations = { NULL, /* get_block */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* flushpage */ NULL, /* truncate */ NULL, /* permission */ - NULL, /* smap */ nfs_revalidate, /* revalidate */ }; @@ -302,51 +300,23 @@ out_error: * page-in of the RPC reply, nowhere else, this simplies * things substantially. */ -static struct page *try_to_get_dirent_page(struct file *file, __u32 cookie, int refetch_ok) + +static int nfs_dir_filler(struct dentry *dentry, struct page *page) { struct nfs_readdirargs rd_args; struct nfs_readdirres rd_res; - struct dentry *dentry = file->f_dentry; struct inode *inode = dentry->d_inode; - struct page *page, **hash, *page_cache; - long offset; + long offset = page->index; __u32 *cookiep; + int err; - page = NULL; - page_cache = page_cache_alloc(); - if (!page_cache) - goto out; - - if ((offset = nfs_readdir_offset(inode, cookie)) < 0) { - if (!refetch_ok || - (offset = refetch_to_readdir_cookie(file, inode)) < 0) { - page_cache_free(page_cache); - goto out; - } - } + kmap(page); + err = -EIO; cookiep = find_cookie(inode, offset); - if (!cookiep) { - /* Gross fatal error. */ - page_cache_free(page_cache); - goto out; - } - - hash = page_hash(&inode->i_data, offset); -repeat: - page = __find_lock_page(&inode->i_data, offset, hash); - if (page) { - page_cache_free(page_cache); - goto unlock_out; - } + if (!cookiep) + goto fail; - page = page_cache; - if (add_to_page_cache_unique(page, &inode->i_data, offset, hash)) { - page_cache_release(page); - goto repeat; - } - - kmap(page); rd_args.fh = NFS_FH(dentry); rd_res.buffer = (char *)page_address(page); rd_res.bufsiz = PAGE_CACHE_SIZE; @@ -355,27 +325,55 @@ repeat: rd_args.buffer = rd_res.buffer; rd_args.bufsiz = rd_res.bufsiz; rd_args.cookie = rd_res.cookie; - if (rpc_call(NFS_CLIENT(inode), - NFSPROC_READDIR, &rd_args, &rd_res, 0) < 0) - goto error; + err = rpc_call(NFS_CLIENT(inode), + NFSPROC_READDIR, &rd_args, &rd_res, 0); + if (err < 0) + goto fail; } while(rd_res.bufsiz > 0); + err = -EIO; if (rd_res.bufsiz < 0) NFS_DIREOF(inode) = rd_res.cookie; else if (create_cookie(rd_res.cookie, offset, inode)) - goto error; + goto fail; SetPageUptodate(page); -unmap_out: kunmap(page); -unlock_out: UnlockPage(page); -out: + return 0; +fail: + SetPageError(page); + kunmap(page); + UnlockPage(page); + return err; +} + +static struct page *try_to_get_dirent_page(struct file *file, __u32 cookie, int refetch_ok) +{ + struct dentry *dentry = file->f_dentry; + struct inode *inode = dentry->d_inode; + struct page *page; + long offset; + + if ((offset = nfs_readdir_offset(inode, cookie)) < 0) { + if (!refetch_ok || + (offset = refetch_to_readdir_cookie(file, inode)) < 0) { + goto fail; + } + } + + page = read_cache_page(&inode->i_data, offset, + (filler_t *)nfs_dir_filler, dentry); + if (IS_ERR(page)) + goto fail; + if (!Page_Uptodate(page)) + goto fail2; return page; -error: - SetPageError(page); - goto unmap_out; +fail2: + page_cache_release(page); +fail: + return NULL; } /* Seek up to dirent assosciated with the passed in cookie, diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 2028270fa2e7..00279fc6a4b9 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -74,10 +74,8 @@ struct inode_operations nfs_file_inode_operations = { NULL, /* get_block */ nfs_readpage, /* readpage */ nfs_writepage, /* writepage */ - NULL, /* flushpage */ NULL, /* truncate */ NULL, /* permission */ - NULL, /* smap */ nfs_revalidate, /* revalidate */ }; diff --git a/fs/nfs/symlink.c b/fs/nfs/symlink.c index 7b522f58bb2d..1dd02bb78a00 100644 --- a/fs/nfs/symlink.c +++ b/fs/nfs/symlink.c @@ -46,126 +46,101 @@ struct inode_operations nfs_symlink_inode_operations = { NULL, /* get_block */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* flushpage */ NULL, /* truncate */ NULL, /* permission */ - NULL, /* smap */ NULL /* revalidate */ }; /* Symlink caching in the page cache is even more simplistic * and straight-forward than readdir caching. */ -static struct page *try_to_get_symlink_page(struct dentry *dentry, struct inode *inode) +static int nfs_symlink_filler(struct dentry *dentry, struct page *page) { struct nfs_readlinkargs rl_args; - struct page *page, **hash, *page_cache; - - page = NULL; - page_cache = page_cache_alloc(); - if (!page_cache) - goto out; - - hash = page_hash(&inode->i_data, 0); -repeat: - page = __find_lock_page(&inode->i_data, 0, hash); - if (page) { - page_cache_free(page_cache); - goto unlock_out; - } - - page = page_cache; - if (add_to_page_cache_unique(page, &inode->i_data, 0, hash)) { - page_cache_release(page); - goto repeat; - } - kmap(page); - /* We place the length at the beginning of the page, * in host byte order, followed by the string. The * XDR response verification will NULL terminate it. */ rl_args.fh = NFS_FH(dentry); - rl_args.buffer = (const void *)page_address(page_cache); - if (rpc_call(NFS_CLIENT(inode), NFSPROC_READLINK, + rl_args.buffer = (const void *)page_address(page); + if (rpc_call(NFS_CLIENT(dentry->d_inode), NFSPROC_READLINK, &rl_args, NULL, 0) < 0) goto error; SetPageUptodate(page); -unlock_out: kunmap(page); UnlockPage(page); -out: - return page; + return 0; error: SetPageError(page); - goto unlock_out; + kunmap(page); + UnlockPage(page); + return -EIO; } -static int nfs_readlink(struct dentry *dentry, char *buffer, int buflen) +static char *nfs_getlink(struct dentry *dentry, struct page **ppage) { struct inode *inode = dentry->d_inode; struct page *page; - u32 *p, len; + u32 *p; /* Caller revalidated the directory inode already. */ - page = find_get_page(&inode->i_data, 0); - if (!page) - goto no_readlink_page; + page = read_cache_page(&inode->i_data, 0, + (filler_t *)nfs_symlink_filler, dentry); + if (IS_ERR(page)) + goto read_failed; if (!Page_Uptodate(page)) - goto readlink_read_error; -success: + goto followlink_read_error; + *ppage = page; p = (u32 *) kmap(page); - len = *p++; + return (char*)(p+1); + +followlink_read_error: + page_cache_release(page); + return ERR_PTR(-EIO); +read_failed: + return (char*)page; +} + +static int nfs_readlink(struct dentry *dentry, char *buffer, int buflen) +{ + struct page *page = NULL; + u32 len; + char *s = nfs_getlink(dentry, &page); + UPDATE_ATIME(dentry->d_inode); + + len = PTR_ERR(s); + if (IS_ERR(s)) + goto out; + + len = strlen(s); if (len > buflen) len = buflen; - copy_to_user(buffer, p, len); + copy_to_user(buffer, s, len); kunmap(page); page_cache_release(page); +out: return len; - -no_readlink_page: - page = try_to_get_symlink_page(dentry, inode); - if (!page) - goto no_page; - if (Page_Uptodate(page)) - goto success; -readlink_read_error: - page_cache_release(page); -no_page: - return -EIO; } static struct dentry * nfs_follow_link(struct dentry *dentry, struct dentry *base, unsigned int follow) { struct dentry *result; - struct inode *inode = dentry->d_inode; - struct page *page; - u32 *p; + struct page *page = NULL; + char *s = nfs_getlink(dentry, &page); + UPDATE_ATIME(dentry->d_inode); + + if (IS_ERR(s)) + goto fail; + + result = lookup_dentry(s, base, follow); - /* Caller revalidated the directory inode already. */ - page = find_get_page(&inode->i_data, 0); - if (!page) - goto no_followlink_page; - if (!Page_Uptodate(page)) - goto followlink_read_error; -success: - p = (u32 *) kmap(page); - result = lookup_dentry((char *) (p + 1), base, follow); kunmap(page); page_cache_release(page); return result; -no_followlink_page: - page = try_to_get_symlink_page(dentry, inode); - if (!page) - goto no_page; - if (Page_Uptodate(page)) - goto success; -followlink_read_error: - page_cache_release(page); -no_page: - return ERR_PTR(-EIO); +fail: + return (struct dentry *)s; } diff --git a/fs/ntfs/fs.c b/fs/ntfs/fs.c index 1dfdbe387ad8..95dfd7f24fb8 100644 --- a/fs/ntfs/fs.c +++ b/fs/ntfs/fs.c @@ -442,10 +442,8 @@ static struct inode_operations ntfs_inode_operations_nobmap = { NULL, /* get_block */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* flushpage */ NULL, /* truncate */ NULL, /* permission */ - NULL, /* smap */ NULL, /* revalidate */ }; @@ -625,10 +623,8 @@ static struct inode_operations ntfs_inode_operations = { ntfs_bmap, /* get_block */ block_read_full_page, /* readpage */ NULL, /* writepage */ - NULL, /* flushpage */ NULL, /* truncate */ NULL, /* permission */ - NULL, /* smap */ NULL, /* revalidate */ }; @@ -674,10 +670,8 @@ static struct inode_operations ntfs_dir_inode_operations = { NULL, /* get_block */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* flushpage */ NULL, /* truncate */ NULL, /* permission */ - NULL, /* smap */ NULL, /* revalidate */ }; diff --git a/fs/open.c b/fs/open.c index 8c8d5698d755..7a533fb927c0 100644 --- a/fs/open.c +++ b/fs/open.c @@ -55,14 +55,14 @@ out: return error; } -int do_truncate(struct dentry *dentry, unsigned long length) +int do_truncate(struct dentry *dentry, loff_t length) { struct inode *inode = dentry->d_inode; int error; struct iattr newattrs; - /* Not pretty: "inode->i_size" shouldn't really be "off_t". But it is. */ - if ((off_t) length < 0) + /* Not pretty: "inode->i_size" shouldn't really be signed. But it is. */ + if (length < 0) return -EINVAL; down(&inode->i_sem); diff --git a/fs/pipe.c b/fs/pipe.c index e8f4a1ef0e0f..68922b9b80bb 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -608,25 +608,6 @@ fail_inode: struct inode_operations pipe_inode_operations = { &rdwr_pipe_fops, - NULL, /* create */ - NULL, /* lookup */ - NULL, /* link */ - NULL, /* unlink */ - NULL, /* symlink */ - NULL, /* mkdir */ - NULL, /* rmdir */ - NULL, /* mknod */ - NULL, /* rename */ - NULL, /* readlink */ - NULL, /* follow_link */ - NULL, /* get_block */ - NULL, /* readpage */ - NULL, /* writepage */ - NULL, /* flushpage */ - NULL, /* truncate */ - NULL, /* permission */ - NULL, /* smap */ - NULL /* revalidate */ }; int do_pipe(int *fd) diff --git a/fs/proc/base.c b/fs/proc/base.c index af68aa4511df..eac10cbd434d 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -369,10 +369,8 @@ static struct inode_operations proc_mem_inode_operations = { NULL, /* get_block */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* flushpage */ NULL, /* truncate */ proc_permission, /* permission */ - NULL, /* smap */ NULL /* revalidate */ }; @@ -794,7 +792,6 @@ static struct inode_operations proc_fd_inode_operations = { NULL, /* get_block */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* flushpage */ NULL, /* truncate */ proc_permission, /* permission */ }; diff --git a/fs/proc/kmsg.c b/fs/proc/kmsg.c index bfe6c8c2e631..b9344ba55e89 100644 --- a/fs/proc/kmsg.c +++ b/fs/proc/kmsg.c @@ -61,23 +61,4 @@ static struct file_operations proc_kmsg_operations = { struct inode_operations proc_kmsg_inode_operations = { &proc_kmsg_operations, /* default base directory file-ops */ - NULL, /* create */ - NULL, /* lookup */ - NULL, /* link */ - NULL, /* unlink */ - NULL, /* symlink */ - NULL, /* mkdir */ - NULL, /* rmdir */ - NULL, /* mknod */ - NULL, /* rename */ - NULL, /* readlink */ - NULL, /* follow_link */ - NULL, /* get_block */ - NULL, /* readpage */ - NULL, /* writepage */ - NULL, /* flushpage */ - NULL, /* truncate */ - NULL, /* permission */ - NULL, /* smap */ - NULL /* revalidate */ }; diff --git a/fs/proc/omirr.c b/fs/proc/omirr.c index f738827a7fb4..f205dd7538e8 100644 --- a/fs/proc/omirr.c +++ b/fs/proc/omirr.c @@ -278,23 +278,4 @@ static struct file_operations omirr_operations = { struct inode_operations proc_omirr_inode_operations = { &omirr_operations, - NULL, /* create */ - NULL, /* lookup */ - NULL, /* link */ - NULL, /* unlink */ - NULL, /* symlink */ - NULL, /* mkdir */ - NULL, /* rmdir */ - NULL, /* mknod */ - NULL, /* rename */ - NULL, /* readlink */ - NULL, /* follow_link */ - NULL, /* get_block */ - NULL, /* readpage */ - NULL, /* writepage */ - NULL, /* flushpage */ - NULL, /* truncate */ - NULL, /* permission */ - NULL, /* smap */ - NULL /* revalidate */ }; diff --git a/fs/proc/openpromfs.c b/fs/proc/openpromfs.c index 5dc1e9dc47e7..55f290664095 100644 --- a/fs/proc/openpromfs.c +++ b/fs/proc/openpromfs.c @@ -565,25 +565,6 @@ static struct file_operations openpromfs_prop_ops = { static struct inode_operations openpromfs_prop_inode_ops = { &openpromfs_prop_ops, /* default property file-ops */ - NULL, /* create */ - NULL, /* lookup */ - NULL, /* link */ - NULL, /* unlink */ - NULL, /* symlink */ - NULL, /* mkdir */ - NULL, /* rmdir */ - NULL, /* mknod */ - NULL, /* rename */ - NULL, /* readlink */ - NULL, /* follow_link */ - NULL, /* get_block */ - NULL, /* readpage */ - NULL, /* writepage */ - NULL, /* flushpage */ - NULL, /* truncate */ - NULL, /* permission */ - NULL, /* smap */ - NULL /* revalidate */ }; static struct file_operations openpromfs_nodenum_ops = { @@ -602,25 +583,6 @@ static struct file_operations openpromfs_nodenum_ops = { static struct inode_operations openpromfs_nodenum_inode_ops = { &openpromfs_nodenum_ops,/* default .node file-ops */ - NULL, /* create */ - NULL, /* lookup */ - NULL, /* link */ - NULL, /* unlink */ - NULL, /* symlink */ - NULL, /* mkdir */ - NULL, /* rmdir */ - NULL, /* mknod */ - NULL, /* rename */ - NULL, /* readlink */ - NULL, /* follow_link */ - NULL, /* get_block */ - NULL, /* readpage */ - NULL, /* writepage */ - NULL, /* flushpage */ - NULL, /* truncate */ - NULL, /* permission */ - NULL, /* smap */ - NULL /* revalidate */ }; static struct file_operations openprom_alias_operations = { @@ -653,10 +615,8 @@ static struct inode_operations openprom_alias_inode_operations = { NULL, /* get_block */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* flushpage */ NULL, /* truncate */ NULL, /* permission */ - NULL, /* smap */ NULL /* revalidate */ }; diff --git a/fs/proc/proc_devtree.c b/fs/proc/proc_devtree.c index 9ccd81581bec..1b4b7f23bd57 100644 --- a/fs/proc/proc_devtree.c +++ b/fs/proc/proc_devtree.c @@ -41,62 +41,6 @@ static int property_read_proc(char *page, char **start, off_t off, * and "@10" to it. */ -static int devtree_readlink(struct dentry *, char *, int); -static struct dentry *devtree_follow_link(struct dentry *, struct dentry *, unsigned int); - -struct inode_operations devtree_symlink_inode_operations = { - NULL, /* no file-operations */ - NULL, /* create */ - NULL, /* lookup */ - NULL, /* link */ - NULL, /* unlink */ - NULL, /* symlink */ - NULL, /* mkdir */ - NULL, /* rmdir */ - NULL, /* mknod */ - NULL, /* rename */ - devtree_readlink, /* readlink */ - devtree_follow_link, /* follow_link */ - NULL, /* get_block */ - NULL, /* readpage */ - NULL, /* writepage */ - NULL, /* flushpage */ - NULL, /* truncate */ - NULL, /* permission */ - NULL, /* smap */ - NULL /* revalidate */ -}; - -static struct dentry *devtree_follow_link(struct dentry *dentry, - struct dentry *base, - unsigned int follow) -{ - struct inode *inode = dentry->d_inode; - struct proc_dir_entry * de; - char *link; - - de = (struct proc_dir_entry *) inode->u.generic_ip; - link = (char *) de->data; - return lookup_dentry(link, base, follow); -} - -static int devtree_readlink(struct dentry *dentry, char *buffer, int buflen) -{ - struct inode *inode = dentry->d_inode; - struct proc_dir_entry * de; - char *link; - int linklen; - - de = (struct proc_dir_entry *) inode->u.generic_ip; - link = (char *) de->data; - linklen = strlen(link); - if (linklen > buflen) - linklen = buflen; - if (copy_to_user(buffer, link, linklen)) - return -EFAULT; - return linklen; -} - /* * Process a node, adding entries for its children and its properties. */ @@ -115,18 +59,11 @@ static void add_node(struct device_node *np, struct proc_dir_entry *de) * Unfortunately proc_register puts each new entry * at the beginning of the list. So we rearrange them. */ - ent = kmalloc(sizeof(struct proc_dir_entry), GFP_KERNEL); + ent = create_proc_read_entry(de, 0, pp->name, + property_read_proc, pp); if (ent == 0) break; - memset(ent, 0, sizeof(struct proc_dir_entry)); - ent->name = pp->name; - ent->namelen = strlen(pp->name); - ent->mode = S_IFREG | S_IRUGO; - ent->nlink = 1; - ent->data = pp; - ent->read_proc = property_read_proc; ent->size = pp->length; - proc_register(de, ent); *lastp = ent; lastp = &ent->next; } @@ -140,15 +77,9 @@ static void add_node(struct device_node *np, struct proc_dir_entry *de) l = strlen(p); if (l > 2 && p[l-2] == '@' && p[l-1] == '0') l -= 2; - ent = kmalloc(sizeof(struct proc_dir_entry), GFP_KERNEL); + ent = proc_mkdir(de, p); if (ent == 0) break; - memset(ent, 0, sizeof(struct proc_dir_entry)); - ent->name = p; - ent->namelen = l; - ent->mode = S_IFDIR | S_IRUGO | S_IXUGO; - ent->nlink = 2; - proc_register(de, ent); *lastp = ent; lastp = &ent->next; add_node(child, ent); @@ -168,18 +99,9 @@ static void add_node(struct device_node *np, struct proc_dir_entry *de) if (sib->name && strcmp(sib->name, child->name) == 0) break; if (sib == child && strncmp(p, child->name, l) != 0) { - al = kmalloc(sizeof(struct proc_dir_entry), - GFP_KERNEL); + al = proc_symlink(de, child->name, ent->name); if (al == 0) break; - memset(al, 0, sizeof(struct proc_dir_entry)); - al->name = child->name; - al->namelen = strlen(child->name); - al->mode = S_IFLNK | S_IRUGO | S_IXUGO; - al->nlink = 1; - al->data = (void *) ent->name; - al->ops = &devtree_symlink_inode_operations; - proc_register(de, al); *lastp = al; lastp = &al->next; } @@ -187,16 +109,9 @@ static void add_node(struct device_node *np, struct proc_dir_entry *de) /* * Add another directory with the @address part as its name. */ - al = kmalloc(sizeof(struct proc_dir_entry), GFP_KERNEL); + al = proc_symlink(de, at, ent->name); if (al == 0) break; - memset(al, 0, sizeof(struct proc_dir_entry)); - al->name = at; - al->namelen = strlen(at); - al->mode = S_IFLNK | S_IRUGO | S_IXUGO; - al->nlink = 1; - al->data = (void *) ent->name; - al->ops = &devtree_symlink_inode_operations; proc_register(de, al); *lastp = al; lastp = &al->next; diff --git a/fs/qnx4/dir.c b/fs/qnx4/dir.c index 0fbae7262f8c..810c3accd860 100644 --- a/fs/qnx4/dir.c +++ b/fs/qnx4/dir.c @@ -122,9 +122,7 @@ struct inode_operations qnx4_dir_inode_operations = NULL, /* get_block */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* flushpage */ NULL, /* truncate */ NULL, /* permission */ - NULL, /* smap */ NULL /* revalidate */ }; diff --git a/fs/qnx4/file.c b/fs/qnx4/file.c index 9eb9be4e3bdc..49ea90921a41 100644 --- a/fs/qnx4/file.c +++ b/fs/qnx4/file.c @@ -192,14 +192,12 @@ struct inode_operations qnx4_file_inode_operations = qnx4_bmap, /* get_block */ qnx4_readpage, /* readpage */ NULL, /* writepage */ - NULL, /* flushpage */ #ifdef CONFIG_QNX4FS_RW qnx4_truncate, /* truncate */ #else NULL, #endif NULL, /* permission */ - NULL, /* smap */ NULL /* revalidate */ }; @@ -221,7 +219,6 @@ static int qnx4_readpage(struct dentry *dentry, struct page *page) return -EIO; } atomic_inc(&page->count); - set_bit(PG_locked, &page->flags); buf = page_address(page); clear_bit(PG_uptodate, &page->flags); clear_bit(PG_error, &page->flags); @@ -252,8 +249,7 @@ static int qnx4_readpage(struct dentry *dentry, struct page *page) } else { set_bit(PG_uptodate, &page->flags); } - clear_bit(PG_locked, &page->flags); - wake_up(&page->wait); + Unlock_Page(page); /* free_page(buf); */ return res; diff --git a/fs/qnx4/symlinks.c b/fs/qnx4/symlinks.c index 0b3f9ae03d6c..ed7cdfca6083 100644 --- a/fs/qnx4/symlinks.c +++ b/fs/qnx4/symlinks.c @@ -46,10 +46,8 @@ struct inode_operations qnx4_symlink_inode_operations = NULL, /* get_block */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* flushpage */ NULL, /* truncate */ NULL, /* permission */ - NULL, /* smap */ NULL /* revalidate */ }; diff --git a/fs/romfs/inode.c b/fs/romfs/inode.c index 4922f6547bd7..6f22d104704a 100644 --- a/fs/romfs/inode.c +++ b/fs/romfs/inode.c @@ -536,10 +536,8 @@ static struct inode_operations romfs_file_inode_operations = { NULL, /* get_block -- not really */ romfs_readpage, /* readpage */ NULL, /* writepage */ - NULL, /* flushpage */ NULL, /* truncate */ NULL, /* permission */ - NULL, /* smap */ NULL /* revalidate */ }; @@ -580,10 +578,8 @@ static struct inode_operations romfs_dir_inode_operations = { NULL, /* get_block */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* flushpage */ NULL, /* truncate */ NULL, /* permission */ - NULL, /* smap */ NULL /* revalidate */ }; @@ -603,10 +599,8 @@ static struct inode_operations romfs_link_inode_operations = { NULL, /* get_block */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* flushpage */ NULL, /* truncate */ NULL, /* permission */ - NULL, /* smap */ NULL /* revalidate */ }; diff --git a/fs/smbfs/cache.c b/fs/smbfs/cache.c index 17251b80b847..46354e5a58a6 100644 --- a/fs/smbfs/cache.c +++ b/fs/smbfs/cache.c @@ -38,102 +38,54 @@ printk_name(const char *name, int len) } #endif -/* - * Get a page for this inode, if new is set then we want to allocate - * the page if it isn't in memory. As I understand it the rest of the - * smb-cache code assumes we return a locked page. - */ -static unsigned long -get_cached_page(struct address_space *mapping, unsigned long offset, int new) -{ - struct page * page; - struct page ** hash; - struct page *cached_page = NULL; - - again: - hash = page_hash(mapping, offset); - page = __find_lock_page(mapping, offset, hash); - if(!page && new) { - /* not in cache, alloc a new page if we didn't do it yet */ - if (!cached_page) { - cached_page = page_cache_alloc(); - if (!cached_page) - return 0; - /* smb code assumes pages are zeroed */ - clear_page(page_address(cached_page)); - goto again; - } - page = cached_page; - if (page->buffers) - BUG(); - printk(KERN_DEBUG "smbfs: get_cached_page\n"); - if (add_to_page_cache_unique(page, mapping, offset, hash)) - /* Hmm, a page has materialized in the - cache. Fine. Go back and get that page - instead... */ - goto again; - cached_page = NULL; - } - printk(KERN_DEBUG "smbfs: get_cached_page done\n"); - if (cached_page) - page_cache_free(cached_page); - if(!page) - return 0; - if(!PageLocked(page)) - BUG(); - return page_address(page); -} - static inline struct address_space * get_cache_inode(struct cache_head *cachep) { - return (mem_map + MAP_NR((unsigned long) cachep))->mapping; + return page_cache_entry((unsigned long) cachep)->mapping; } /* - * Get a pointer to the cache_head structure, - * mapped as the page at offset 0. The page is - * kept locked while we're using the cache. + * Try to reassemble the old dircache. If we fail - set ->valid to 0. + * In any case, get at least the page at offset 0 (with ->valid==0 if + * the old one didn't make it, indeed). */ struct cache_head * smb_get_dircache(struct dentry * dentry) { struct address_space * mapping = &dentry->d_inode->i_data; - struct cache_head * cachep; + struct cache_head * cachep = NULL; + struct page *page; -#ifdef SMBFS_DEBUG_VERBOSE - printk("smb_get_dircache: finding cache for %s/%s\n", - dentry->d_parent->d_name.name, dentry->d_name.name); -#endif - cachep = (struct cache_head *) get_cached_page(mapping, 0, 1); - if (!cachep) + page = find_lock_page(mapping, 0); + if (!page) { + /* Sorry, not even page 0 around */ + page = grab_cache_page(mapping, 0); + if (!page) + goto out; + cachep = (struct cache_head *)kmap(page); + memset((char*)cachep, 0, PAGE_SIZE); goto out; - if (cachep->valid) - { + } + cachep = (struct cache_head *)kmap(page); + if (cachep->valid) { + /* + * OK, at least the page 0 survived and seems to be promising. + * Let's try to reassemble the rest. + */ struct cache_index * index = cachep->index; - struct cache_block * block; unsigned long offset; int i; - cachep->valid = 0; - /* - * Here we only want to find existing cache blocks, - * not add new ones. - */ - for (i = 0; i < cachep->pages; i++, index++) { -#ifdef SMBFS_PARANOIA -if (index->block) -printk("smb_get_dircache: cache %s/%s has existing block!\n", -dentry->d_parent->d_name.name, dentry->d_name.name); -#endif - offset = PAGE_SIZE + (i << PAGE_SHIFT); - block = (struct cache_block *) get_cached_page(mapping, - offset, 0); - if (!block) + for (offset = 0, i = 0; i < cachep->pages; i++, index++) { + offset += PAGE_SIZE; + page = find_lock_page(mapping,offset>>PAGE_CACHE_SHIFT); + if (!page) { + /* Alas, poor Yorick */ + cachep->valid = 0; goto out; - index->block = block; + } + index->block = (struct cache_block *) kmap(page); } - cachep->valid = 1; } out: return cachep; @@ -146,18 +98,20 @@ static void smb_free_cache_blocks(struct cache_head * cachep) { struct cache_index * index = cachep->index; + struct page * page; int i; #ifdef SMBFS_DEBUG_VERBOSE printk("smb_free_cache_blocks: freeing %d blocks\n", cachep->pages); #endif - for (i = 0; i < cachep->pages; i++, index++) - { - if (index->block) - { - put_cached_page((unsigned long) index->block); - index->block = NULL; - } + for (i = 0; i < cachep->pages; i++, index++) { + if (!index->block) + continue; + page = page_cache_entry((unsigned long) index->block); + index->block = NULL; + kunmap(page); + UnlockPage(page); + page_cache_release(page); } } @@ -167,11 +121,15 @@ printk("smb_free_cache_blocks: freeing %d blocks\n", cachep->pages); void smb_free_dircache(struct cache_head * cachep) { + struct page *page; #ifdef SMBFS_DEBUG_VERBOSE printk("smb_free_dircache: freeing cache\n"); #endif smb_free_cache_blocks(cachep); - put_cached_page((unsigned long) cachep); + page = page_cache_entry((unsigned long) cachep); + kunmap(page); + UnlockPage(page); + page_cache_release(page); } /* @@ -199,6 +157,7 @@ smb_add_to_cache(struct cache_head * cachep, struct cache_dirent *entry, struct address_space * mapping = get_cache_inode(cachep); struct cache_index * index; struct cache_block * block; + struct page *page; unsigned long page_off; unsigned int nent, offset, len = entry->len; unsigned int needed = len + sizeof(struct cache_entry); @@ -220,8 +179,7 @@ printk(" at %ld\n", fpos); goto get_block; /* space available? */ - if (needed < index->space) - { + if (needed < index->space) { add_entry: nent = index->num_entries; index->num_entries++; @@ -249,26 +207,17 @@ len, fpos, cachep->entries); if (cachep->idx > NINDEX) /* not likely */ goto out_full; index++; -#ifdef SMBFS_PARANOIA -if (index->block) -printk("smb_add_to_cache: new index already has block!\n"); -#endif - /* - * Get the next cache block + * Get the next cache block. We don't care for its contents. */ get_block: cachep->pages++; page_off = PAGE_SIZE + (cachep->idx << PAGE_SHIFT); - block = (struct cache_block *) get_cached_page(mapping, page_off, 1); - if (block) - { + page = grab_cache_page(mapping, page_off>>PAGE_CACHE_SHIFT); + if (page) { + block = (struct cache_block *)kmap(page); index->block = block; index->space = PAGE_SIZE; -#ifdef SMBFS_DEBUG_VERBOSE -printk("smb_add_to_cache: mapping=%p, pages=%d, block at %ld\n", -mapping, cachep->pages, page_off); -#endif goto add_entry; } /* diff --git a/fs/smbfs/dir.c b/fs/smbfs/dir.c index cbaa646b397d..37bf168f6a93 100644 --- a/fs/smbfs/dir.c +++ b/fs/smbfs/dir.c @@ -62,10 +62,8 @@ struct inode_operations smb_dir_inode_operations = NULL, /* get_block */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* flushpage */ NULL, /* truncate */ NULL, /* permission */ - NULL, /* smap */ smb_revalidate_inode, /* revalidate */ }; diff --git a/fs/smbfs/file.c b/fs/smbfs/file.c index aa829b56650a..189c853f9c1d 100644 --- a/fs/smbfs/file.c +++ b/fs/smbfs/file.c @@ -401,9 +401,7 @@ struct inode_operations smb_file_inode_operations = NULL, /* get_block */ smb_readpage, /* readpage */ smb_writepage, /* writepage */ - NULL, /* flushpage */ NULL, /* truncate */ smb_file_permission, /* permission */ - NULL, /* smap */ smb_revalidate_inode, /* revalidate */ }; diff --git a/fs/sysv/dir.c b/fs/sysv/dir.c index d76f1d6b3729..f17fb8b637a4 100644 --- a/fs/sysv/dir.c +++ b/fs/sysv/dir.c @@ -62,10 +62,8 @@ struct inode_operations sysv_dir_inode_operations = { NULL, /* get_block */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* flushpage */ NULL, /* truncate */ NULL, /* permission */ - NULL, /* smap */ NULL /* revalidate */ }; diff --git a/fs/sysv/file.c b/fs/sysv/file.c index 36650f14171d..f54b8d6bc2be 100644 --- a/fs/sysv/file.c +++ b/fs/sysv/file.c @@ -77,9 +77,7 @@ struct inode_operations sysv_file_inode_operations = { sysv_get_block, /* get_block */ block_read_full_page, /* readpage */ block_write_full_page, /* writepage */ - block_flushpage, /* flushpage */ sysv_truncate, /* truncate */ NULL, /* permission */ - NULL, /* smap */ NULL /* revalidate */ }; diff --git a/fs/sysv/symlink.c b/fs/sysv/symlink.c index df611d589944..b84e4504e8c6 100644 --- a/fs/sysv/symlink.c +++ b/fs/sysv/symlink.c @@ -42,10 +42,8 @@ struct inode_operations sysv_symlink_inode_operations = { NULL, /* get_block */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* flushpage */ NULL, /* truncate */ NULL, /* permission */ - NULL, /* smap */ NULL /* revalidate */ }; diff --git a/fs/udf/dir.c b/fs/udf/dir.c index 16cfb9a0aaf6..84218eb14cd1 100644 --- a/fs/udf/dir.c +++ b/fs/udf/dir.c @@ -97,10 +97,8 @@ struct inode_operations udf_dir_inode_operations = { NULL, /* get_block */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* flushpage */ NULL, /* truncate */ NULL, /* permission */ - NULL, /* smap */ NULL /* revalidate */ }; diff --git a/fs/udf/file.c b/fs/udf/file.c index 83611fe67f72..2e17d81d499f 100644 --- a/fs/udf/file.c +++ b/fs/udf/file.c @@ -89,14 +89,12 @@ struct inode_operations udf_file_inode_operations = { udf_get_block, /* get_block */ block_read_full_page, /* readpage */ block_write_full_page, /* writepage */ - block_flushpage, /* flushpage */ #ifdef CONFIG_UDF_RW udf_truncate, /* truncate */ #else NULL, /* truncate */ #endif NULL, /* permission */ - NULL, /* smap */ NULL /* revalidate */ }; @@ -134,14 +132,12 @@ struct inode_operations udf_file_inode_operations_adinicb = { udf_get_block, /* get_block */ block_read_full_page, /* readpage */ block_write_full_page, /* writepage */ - block_flushpage, /* flushpage */ #ifdef CONFIG_UDF_RW udf_truncate, /* truncate */ #else NULL, /* truncate */ #endif NULL, /* permission */ - NULL, /* smap */ NULL /* revalidate */ }; diff --git a/fs/udf/symlink.c b/fs/udf/symlink.c index 0e52198f5d4c..f1b076263df4 100644 --- a/fs/udf/symlink.c +++ b/fs/udf/symlink.c @@ -43,25 +43,23 @@ static struct dentry * udf_follow_link(struct dentry * dentry, * symlinks can't do much... */ struct inode_operations udf_symlink_inode_operations = { - NULL, /* no file-operations */ - NULL, /* create */ - NULL, /* lookup */ - NULL, /* link */ - NULL, /* unlink */ - NULL, /* symlink */ - NULL, /* mkdir */ - NULL, /* rmdir */ - NULL, /* mknod */ - NULL, /* rename */ - udf_readlink, /* readlink */ - udf_follow_link,/* follow_link */ + NULL, /* no file-operations */ + NULL, /* create */ + NULL, /* lookup */ + NULL, /* link */ + NULL, /* unlink */ + NULL, /* symlink */ + NULL, /* mkdir */ + NULL, /* rmdir */ + NULL, /* mknod */ + NULL, /* rename */ + udf_readlink, /* readlink */ + udf_follow_link, /* follow_link */ NULL, /* get_block */ - NULL, /* readpage */ - NULL, /* writepage */ - NULL, /* flushpage */ - NULL, /* truncate */ - NULL, /* permission */ - NULL, /* smap */ + NULL, /* readpage */ + NULL, /* writepage */ + NULL, /* truncate */ + NULL, /* permission */ NULL /* revalidate */ }; diff --git a/fs/ufs/dir.c b/fs/ufs/dir.c index 0b23ad37fb35..f2be33cfa5a0 100644 --- a/fs/ufs/dir.c +++ b/fs/ufs/dir.c @@ -210,9 +210,7 @@ struct inode_operations ufs_dir_inode_operations = { NULL, /* get_block */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* flushpage */ NULL, /* truncate */ ufs_permission, /* permission */ - NULL, /* smap */ NULL /* revalidate */ }; diff --git a/fs/ufs/file.c b/fs/ufs/file.c index 33d68ba5d000..b014e6c141db 100644 --- a/fs/ufs/file.c +++ b/fs/ufs/file.c @@ -153,9 +153,7 @@ struct inode_operations ufs_file_inode_operations = { ufs_getfrag_block, /* get_block */ block_read_full_page, /* readpage */ block_write_full_page, /* writepage */ - block_flushpage, /* flushpage */ ufs_truncate, /* truncate */ NULL, /* permission */ - NULL, /* smap */ NULL /* revalidate */ }; diff --git a/fs/ufs/symlink.c b/fs/ufs/symlink.c index 3df5c40a6c3b..519d27b267be 100644 --- a/fs/ufs/symlink.c +++ b/fs/ufs/symlink.c @@ -132,9 +132,7 @@ struct inode_operations ufs_symlink_inode_operations = { NULL, /* get_block */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* flushpage */ NULL, /* truncate */ NULL, /* permission */ - NULL, /* smap */ NULL /* revalidate */ }; diff --git a/fs/umsdos/dir.c b/fs/umsdos/dir.c index c09c293cf591..670f3fd5d8d0 100644 --- a/fs/umsdos/dir.c +++ b/fs/umsdos/dir.c @@ -834,9 +834,7 @@ struct inode_operations umsdos_dir_inode_operations = NULL, /* get_block */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* flushpage */ NULL, /* truncate */ NULL, /* permission */ - NULL, /* smap */ NULL, /* revalidate */ }; diff --git a/fs/umsdos/rdir.c b/fs/umsdos/rdir.c index c7bb8fb1bd92..c2c5e2905a32 100644 --- a/fs/umsdos/rdir.c +++ b/fs/umsdos/rdir.c @@ -252,6 +252,5 @@ struct inode_operations umsdos_rdir_inode_operations = NULL, /* get_block */ NULL, /* truncate */ NULL, /* permission */ - NULL, /* smap */ NULL, /* revalidate */ }; diff --git a/fs/umsdos/symlink.c b/fs/umsdos/symlink.c index 8f94230e3519..f6614595ab18 100644 --- a/fs/umsdos/symlink.c +++ b/fs/umsdos/symlink.c @@ -138,10 +138,8 @@ struct inode_operations umsdos_symlink_inode_operations = fat_get_block, /* get_block */ block_read_full_page, /* readpage */ NULL, /* writepage */ - NULL, /* flushpage */ NULL, /* truncate */ NULL, /* permission */ - NULL, /* smap */ NULL /* revalidate */ }; diff --git a/fs/vfat/namei.c b/fs/vfat/namei.c index 0f23487ba3d3..2ee7aaf0a6dd 100644 --- a/fs/vfat/namei.c +++ b/fs/vfat/namei.c @@ -1205,10 +1205,8 @@ struct inode_operations vfat_dir_inode_operations = { NULL, /* get_block */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* flushpage */ NULL, /* truncate */ NULL, /* permission */ - NULL, /* smap */ NULL /* revalidate */ }; diff --git a/include/asm-alpha/pgtable.h b/include/asm-alpha/pgtable.h index f2c65567f2cb..a627d50ba431 100644 --- a/include/asm-alpha/pgtable.h +++ b/include/asm-alpha/pgtable.h @@ -221,7 +221,6 @@ extern inline int pgd_present(pgd_t pgd) { return pgd_val(pgd) & _PAGE_VALID; } extern inline void pgd_clear(pgd_t * pgdp) { pgd_val(*pgdp) = 0; } #define page_address(page) ((page)->virtual) -#define __page_address(page) (PAGE_OFFSET + (((page) - mem_map) << PAGE_SHIFT)) /* * The following only work if pte_present() is true. diff --git a/include/asm-arm/pgtable.h b/include/asm-arm/pgtable.h index f22867eecb19..8b25a0e694b8 100644 --- a/include/asm-arm/pgtable.h +++ b/include/asm-arm/pgtable.h @@ -85,7 +85,6 @@ extern void __handle_bad_pmd_kernel(pmd_t *pmd); * Permanent address of a page. */ #define page_address(page) ({ if (!(page)->virtual) BUG(); (page)->virtual; }) -#define __page_address(page) (PAGE_OFFSET + (((page) - mem_map) << PAGE_SHIFT)) #define pages_to_mb(x) ((x) >> (20 - PAGE_SHIFT)) #define pte_page(x) (mem_map + pte_pagenr(x)) diff --git a/include/asm-i386/pgtable.h b/include/asm-i386/pgtable.h index 16e2b48a6f17..c0177923cc1f 100644 --- a/include/asm-i386/pgtable.h +++ b/include/asm-i386/pgtable.h @@ -172,7 +172,6 @@ extern void __handle_bad_pmd_kernel(pmd_t * pmd); * called on a highmem page. */ #define page_address(page) ({ if (!(page)->virtual) BUG(); (page)->virtual; }) -#define __page_address(page) ({ if (PageHighMem(page)) BUG(); PAGE_OFFSET + (((page) - mem_map) << PAGE_SHIFT); }) #define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT)) #define pte_page(x) (mem_map+pte_pagenr(x)) diff --git a/include/asm-ppc/pgtable.h b/include/asm-ppc/pgtable.h index d671e2f6e174..3c1eec8c0657 100644 --- a/include/asm-ppc/pgtable.h +++ b/include/asm-ppc/pgtable.h @@ -275,7 +275,6 @@ extern pte_t * __bad_pagetable(void); * Permanent address of a page. */ #define page_address(page) ({ if (!(page)->virtual) BUG(); (page)->virtual; }) -#define __page_address(page) (PAGE_OFFSET + (((page) - mem_map) << PAGE_SHIFT)) #define pages_to_mb(x) ((x) >> (20-PAGE_SHIFT)) #define pte_page(x) (mem_map+pte_pagenr(x)) diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h index cea5b8801271..4e373d7d041f 100644 --- a/include/linux/bootmem.h +++ b/include/linux/bootmem.h @@ -1,3 +1,6 @@ +/* + * Discontiguous memory support, Kanoj Sarcar, SGI, Nov 1999 + */ #ifndef _LINUX_BOOTMEM_H #define _LINUX_BOOTMEM_H @@ -12,6 +15,18 @@ extern unsigned long max_low_pfn; +/* + * node_bootmem_map is a map pointer - the bits represent all physical + * memory pages (including holes) on the node. + */ +typedef struct bootmem_data { + unsigned long node_boot_start; + unsigned long node_low_pfn; + void *node_bootmem_map; + unsigned long last_offset; + unsigned long last_pos; +} bootmem_data_t; + extern unsigned long __init bootmem_bootmap_pages (unsigned long); extern unsigned long __init init_bootmem (unsigned long addr, unsigned long memend); extern void __init reserve_bootmem (unsigned long addr, unsigned long size); @@ -27,6 +42,18 @@ extern void * __init __alloc_bootmem (unsigned long size, unsigned long align, u __alloc_bootmem((x), PAGE_SIZE, 0) extern unsigned long __init free_all_bootmem (void); +extern unsigned long __init init_bootmem_node (int nid, unsigned long freepfn, unsigned long startpfn, unsigned long endpfn); +extern void __init reserve_bootmem_node (int nid, unsigned long physaddr, unsigned long size); +extern void __init free_bootmem_node (int nid, unsigned long addr, unsigned long size); +extern unsigned long __init free_all_bootmem_node (int nid); +extern void * __init __alloc_bootmem_node (int nid, unsigned long size, unsigned long align, unsigned long goal); +#define alloc_bootmem_node(nid, x) \ + __alloc_bootmem_node((nid), (x), SMP_CACHE_BYTES, __pa(MAX_DMA_ADDRESS)) +#define alloc_bootmem_pages_node(nid, x) \ + __alloc_bootmem_node((nid), (x), PAGE_SIZE, __pa(MAX_DMA_ADDRESS)) +#define alloc_bootmem_low_pages_node(nid, x) \ + __alloc_bootmem_node((nid), (x), PAGE_SIZE, 0) + #endif /* _LINUX_BOOTMEM_H */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 0e7b557b36c2..834c4aa29b29 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -637,7 +637,7 @@ struct inode_operations { /* * the order of these functions within the VFS template has been * changed because SMP locking has changed: from now on all get_block, - * readpage, writepage and flushpage functions are supposed to do + * readpage and writepage functions are supposed to do * whatever locking they need to get proper SMP operation - for * now in most cases this means a lock/unlock_kernel at entry/exit. * [The new order is also slightly more logical :)] @@ -651,11 +651,9 @@ struct inode_operations { int (*readpage) (struct dentry *, struct page *); int (*writepage) (struct dentry *, struct page *); - int (*flushpage) (struct inode *, struct page *, unsigned long); void (*truncate) (struct inode *); int (*permission) (struct inode *, int); - int (*smap) (struct inode *,int); int (*revalidate) (struct dentry *); }; @@ -733,7 +731,7 @@ extern inline int locks_verify_area(int read_write, struct inode *inode, asmlinkage long sys_open(const char *, int, int); asmlinkage long sys_close(unsigned int); /* yes, it's really unsigned */ extern int do_close(unsigned int, int); /* yes, it's really unsigned */ -extern int do_truncate(struct dentry *, unsigned long); +extern int do_truncate(struct dentry *, loff_t start); extern int get_unused_fd(void); extern void put_unused_fd(unsigned int); @@ -948,7 +946,7 @@ extern int block_read_full_page(struct dentry *, struct page *); extern int block_write_full_page (struct dentry *, struct page *); extern int block_write_partial_page (struct file *, struct page *, unsigned long, unsigned long, const char *); extern int block_write_cont_page (struct file *, struct page *, unsigned long, unsigned long, const char *); -extern int block_flushpage(struct inode *, struct page *, unsigned long); +extern int block_flushpage(struct page *, unsigned long); extern int generic_file_mmap(struct file *, struct vm_area_struct *); extern ssize_t generic_file_read(struct file *, char *, size_t, loff_t *); @@ -980,7 +978,7 @@ extern ssize_t block_write(struct file *, const char *, size_t, loff_t *); extern int block_fsync(struct file *, struct dentry *); extern int file_fsync(struct file *, struct dentry *); -extern int generic_buffer_fdatasync(struct inode *inode, unsigned long start, unsigned long end); +extern int generic_buffer_fdatasync(struct inode *inode, unsigned long start_idx, unsigned long end_idx); extern int inode_change_ok(struct inode *, struct iattr *); extern void inode_setattr(struct inode *, struct iattr *); diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 1eb622eb2f10..2e31d77f22eb 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -23,7 +23,7 @@ extern struct buffer_head * create_bounce(int rw, struct buffer_head * bh_orig); extern inline unsigned int nr_free_highpages(void) { return 0; } #define prepare_highmem_swapout(page) page #define replace_with_highmem(page) page -#define kmap(page) __page_address(page) +#define kmap(page) page_address(page) #define kunmap(page) do { } while (0) #endif /* CONFIG_HIGHMEM */ diff --git a/include/linux/mm.h b/include/linux/mm.h index ddb92209d05a..fabab7ab7072 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -9,6 +9,7 @@ #include #include #include +#include extern unsigned long max_mapnr; extern unsigned long num_physpages; @@ -119,8 +120,6 @@ typedef struct { unsigned long val; } swp_entry_t; -struct zone_struct; - /* * Try to keep the most commonly accessed fields in single cache lines * here (16 bytes or greater). This ordering should be particularly @@ -285,75 +284,6 @@ typedef struct page { extern mem_map_t * mem_map; -/* - * Free memory management - zoned buddy allocator. - */ - -#if CONFIG_AP1000 -/* the AP+ needs to allocate 8MB contiguous, aligned chunks of ram - for the ring buffers */ -#define MAX_ORDER 12 -#else -#define MAX_ORDER 10 -#endif - -typedef struct free_area_struct { - struct list_head free_list; - unsigned int * map; -} free_area_t; - -typedef struct zone_struct { - /* - * Commonly accessed fields: - */ - spinlock_t lock; - unsigned long offset; - unsigned long free_pages; - int low_on_memory; - unsigned long pages_low, pages_high; - - /* - * free areas of different sizes - */ - free_area_t free_area[MAX_ORDER]; - - /* - * rarely used fields: - */ - char * name; - unsigned long size; -} zone_t; - -#define ZONE_DMA 0 -#define ZONE_NORMAL 1 -#define ZONE_HIGHMEM 2 - -/* - * NUMA architectures will have more: - */ -#define MAX_NR_ZONES 3 - -/* - * One allocation request operates on a zonelist. A zonelist - * is a list of zones, the first one is the 'goal' of the - * allocation, the other zones are fallback zones, in decreasing - * priority. On NUMA we want to fall back on other CPU's zones - * as well. - * - * Right now a zonelist takes up less than a cacheline. We never - * modify it apart from boot-up, and only a few indices are used, - * so despite the zonelist table being relatively big, the cache - * footprint of this construct is very small. - */ -typedef struct zonelist_struct { - zone_t * zones [MAX_NR_ZONES+1]; // NULL delimited - int gfp_mask; -} zonelist_t; - -#define NR_GFPINDEX 0x100 - -extern zonelist_t zonelists [NR_GFPINDEX]; - /* * There is only one page-allocator function, and two main namespaces to * it. The alloc_page*() variants return 'struct page *' and as such @@ -361,19 +291,24 @@ extern zonelist_t zonelists [NR_GFPINDEX]; * virtual kernel addresses to the allocated page(s). */ extern struct page * FASTCALL(__alloc_pages(zonelist_t *zonelist, unsigned long order)); +extern struct page * alloc_pages_node(int nid, int gfp_mask, unsigned long order); +#ifndef CONFIG_DISCONTIGMEM extern inline struct page * alloc_pages(int gfp_mask, unsigned long order) { /* temporary check. */ - if (zonelists[gfp_mask].gfp_mask != (gfp_mask)) + if (contig_page_data.node_zonelists[gfp_mask].gfp_mask != (gfp_mask)) BUG(); /* * Gets optimized away by the compiler. */ if (order >= MAX_ORDER) return NULL; - return __alloc_pages(zonelists+(gfp_mask), order); + return __alloc_pages(contig_page_data.node_zonelists+(gfp_mask), order); } +#else /* !CONFIG_DISCONTIGMEM */ +extern struct page * alloc_pages(int gfp_mask, unsigned long order); +#endif /* !CONFIG_DISCONTIGMEM */ #define alloc_page(gfp_mask) \ alloc_pages(gfp_mask, 0) @@ -385,7 +320,7 @@ extern inline unsigned long __get_free_pages (int gfp_mask, unsigned long order) page = alloc_pages(gfp_mask, order); if (!page) return 0; - return __page_address(page); + return page_address(page); } #define __get_free_page(gfp_mask) \ @@ -425,8 +360,12 @@ extern inline void __free_pages(struct page *page, unsigned long order) extern inline void free_pages(unsigned long addr, unsigned long order) { - unsigned long map_nr = MAP_NR(addr); + unsigned long map_nr; +#ifdef CONFIG_DISCONTIGMEM + if (addr == 0) return; +#endif + map_nr = MAP_NR(addr); if (map_nr < max_mapnr) __free_pages(mem_map + map_nr, order); } @@ -434,6 +373,7 @@ extern inline void free_pages(unsigned long addr, unsigned long order) #define free_page(addr) free_pages((addr),0) extern void show_free_areas(void); +extern void show_free_areas_node(int nid); extern struct page * put_dirty_page(struct task_struct * tsk, struct page *page, unsigned long address); @@ -444,7 +384,7 @@ extern int copy_page_range(struct mm_struct *dst, struct mm_struct *src, struct extern int remap_page_range(unsigned long from, unsigned long to, unsigned long size, pgprot_t prot); extern int zeromap_page_range(unsigned long from, unsigned long size, pgprot_t prot); -extern void vmtruncate(struct inode * inode, unsigned long offset); +extern void vmtruncate(struct inode * inode, loff_t offset); extern int handle_mm_fault(struct task_struct *tsk,struct vm_area_struct *vma, unsigned long address, int write_access); extern int make_pages_present(unsigned long addr, unsigned long end); extern int access_process_vm(struct task_struct *tsk, unsigned long addr, void *buf, int len, int write); @@ -456,6 +396,8 @@ extern int check_pgt_cache(void); extern void paging_init(void); extern void free_area_init(unsigned int * zones_size); +extern void free_area_init_node(int nid, pg_data_t *pgdat, + unsigned int * zones_size, unsigned long zone_start_paddr); extern void mem_init(void); extern void show_mem(void); extern void oom(struct task_struct * tsk); @@ -470,8 +412,23 @@ extern void build_mmap_avl(struct mm_struct *); extern void exit_mmap(struct mm_struct *); extern unsigned long get_unmapped_area(unsigned long, unsigned long); -extern unsigned long do_mmap(struct file *, unsigned long, unsigned long, - unsigned long, unsigned long, unsigned long); +extern unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, + unsigned long len, unsigned long prot, + unsigned long flag, unsigned long pgoff); + +extern inline unsigned long do_mmap(struct file *file, unsigned long addr, + unsigned long len, unsigned long prot, + unsigned long flag, unsigned long offset) +{ + unsigned long ret = -EINVAL; + if ((offset + PAGE_ALIGN(len)) < offset) + goto out; + if (!(offset & ~PAGE_MASK)) + ret = do_mmap_pgoff(file, addr, len, prot, flag, offset >> PAGE_SHIFT); +out: + return ret; +} + extern int do_munmap(unsigned long, size_t); extern unsigned long do_brk(unsigned long, unsigned long); @@ -479,8 +436,7 @@ extern unsigned long do_brk(unsigned long, unsigned long); extern void remove_inode_page(struct page *); extern unsigned long page_unuse(struct page *); extern int shrink_mmap(int, int); -extern void truncate_inode_pages(struct inode *, unsigned long); -extern void put_cached_page(unsigned long); +extern void truncate_inode_pages(struct inode *, loff_t); /* * GFP bitmasks.. diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h new file mode 100644 index 000000000000..79c206282dc6 --- /dev/null +++ b/include/linux/mmzone.h @@ -0,0 +1,115 @@ +#ifndef _LINUX_MMZONE_H +#define _LINUX_MMZONE_H + +#ifdef __KERNEL__ +#ifndef __ASSEMBLY__ + +#include +#include +#include + +/* + * Free memory management - zoned buddy allocator. + */ + +#if CONFIG_AP1000 +/* the AP+ needs to allocate 8MB contiguous, aligned chunks of ram + for the ring buffers */ +#define MAX_ORDER 12 +#else +#define MAX_ORDER 10 +#endif + +typedef struct free_area_struct { + struct list_head free_list; + unsigned int * map; +} free_area_t; + +typedef struct zone_struct { + /* + * Commonly accessed fields: + */ + spinlock_t lock; + unsigned long offset; + unsigned long free_pages; + int low_on_memory; + unsigned long pages_low, pages_high; + + /* + * free areas of different sizes + */ + free_area_t free_area[MAX_ORDER]; + + /* + * rarely used fields: + */ + char * name; + unsigned long size; +} zone_t; + +#define ZONE_DMA 0 +#define ZONE_NORMAL 1 +#define ZONE_HIGHMEM 2 +#define MAX_NR_ZONES 3 + +/* + * One allocation request operates on a zonelist. A zonelist + * is a list of zones, the first one is the 'goal' of the + * allocation, the other zones are fallback zones, in decreasing + * priority. + * + * Right now a zonelist takes up less than a cacheline. We never + * modify it apart from boot-up, and only a few indices are used, + * so despite the zonelist table being relatively big, the cache + * footprint of this construct is very small. + */ +typedef struct zonelist_struct { + zone_t * zones [MAX_NR_ZONES+1]; // NULL delimited + int gfp_mask; +} zonelist_t; + +#define NR_GFPINDEX 0x100 + +struct bootmem_data; +typedef struct pglist_data { + zone_t node_zones[MAX_NR_ZONES]; + zonelist_t node_zonelists[NR_GFPINDEX]; + struct page *node_mem_map; + unsigned long *valid_addr_bitmap; + struct bootmem_data *bdata; +} pg_data_t; + +extern int numnodes; + +#ifndef CONFIG_DISCONTIGMEM + +extern pg_data_t contig_page_data; + +#define NODE_DATA(nid) (&contig_page_data) +#define NODE_MEM_MAP(nid) mem_map + +#else /* !CONFIG_DISCONTIGMEM */ + +#include + +#endif /* !CONFIG_DISCONTIGMEM */ + +#define MAP_ALIGN(x) ((((x) % sizeof(mem_map_t)) == 0) ? (x) : ((x) + \ + sizeof(mem_map_t) - ((x) % sizeof(mem_map_t)))) + +#ifdef CONFIG_DISCONTIGMEM + +#define LOCAL_MAP_NR(kvaddr) \ + (((unsigned long)(kvaddr)-LOCAL_BASE_ADDR((kvaddr))) >> PAGE_SHIFT) +#define MAP_NR(kaddr) (LOCAL_MAP_NR((kaddr)) + \ + (((unsigned long)ADDR_TO_MAPBASE((kaddr)) - PAGE_OFFSET) / \ + sizeof(mem_map_t))) +#define kern_addr_valid(addr) ((KVADDR_TO_NID((unsigned long)addr) >= \ + numnodes) ? 0 : (test_bit(LOCAL_MAP_NR((addr)), \ + NODE_DATA(KVADDR_TO_NID((unsigned long)addr))->valid_addr_bitmap))) + +#endif /* CONFIG_DISCONTIGMEM */ + +#endif /* !__ASSEMBLY__ */ +#endif /* __KERNEL__ */ +#endif /* _LINUX_MMZONE_H */ diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 2b852a1dc91b..53bc365d28d5 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -80,7 +80,6 @@ extern void lock_page(struct page *page); extern void __add_page_to_hash_queue(struct page * page, struct page **p); extern void add_to_page_cache(struct page * page, struct address_space *mapping, unsigned long index); -extern int add_to_page_cache_unique(struct page * page, struct address_space *mapping, unsigned long index, struct page **hash); extern inline void add_page_to_hash_queue(struct page * page, struct inode * inode, unsigned long index) { diff --git a/include/linux/swap.h b/include/linux/swap.h index 1eb17e3ec2a5..18ea45e63b1e 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -106,6 +106,7 @@ extern struct page * read_swap_cache_async(swp_entry_t, int); */ extern void __delete_from_swap_cache(struct page *page); extern void delete_from_swap_cache(struct page *page); +extern void delete_from_swap_cache_nolock(struct page *page); extern void free_page_and_swap_cache(struct page *page); /* linux/mm/swapfile.c */ diff --git a/kernel/ksyms.c b/kernel/ksyms.c index 928f04a6b7b3..8fe894c5445f 100644 --- a/kernel/ksyms.c +++ b/kernel/ksyms.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include @@ -83,7 +84,7 @@ EXPORT_SYMBOL(get_option); EXPORT_SYMBOL(get_options); /* process memory management */ -EXPORT_SYMBOL(do_mmap); +EXPORT_SYMBOL(do_mmap_pgoff); EXPORT_SYMBOL(do_munmap); EXPORT_SYMBOL(do_brk); EXPORT_SYMBOL(exit_mm); @@ -93,8 +94,11 @@ EXPORT_SYMBOL(exit_sighand); /* internal kernel memory management */ EXPORT_SYMBOL(__alloc_pages); +EXPORT_SYMBOL(alloc_pages_node); EXPORT_SYMBOL(__free_pages_ok); -EXPORT_SYMBOL(zonelists); +#ifndef CONFIG_DISCONTIGMEM +EXPORT_SYMBOL(contig_page_data); +#endif EXPORT_SYMBOL(kmem_find_general_cachep); EXPORT_SYMBOL(kmem_cache_create); EXPORT_SYMBOL(kmem_cache_destroy); @@ -184,7 +188,6 @@ EXPORT_SYMBOL(block_read_full_page); EXPORT_SYMBOL(block_write_full_page); EXPORT_SYMBOL(block_write_partial_page); EXPORT_SYMBOL(block_write_cont_page); -EXPORT_SYMBOL(block_flushpage); EXPORT_SYMBOL(generic_file_read); EXPORT_SYMBOL(do_generic_file_read); EXPORT_SYMBOL(generic_file_write); @@ -199,7 +202,6 @@ EXPORT_SYMBOL(posix_block_lock); EXPORT_SYMBOL(posix_unblock_lock); EXPORT_SYMBOL(locks_mandatory_area); EXPORT_SYMBOL(dput); -EXPORT_SYMBOL(put_cached_page); EXPORT_SYMBOL(is_root_busy); EXPORT_SYMBOL(prune_dcache); EXPORT_SYMBOL(shrink_dcache_sb); @@ -212,7 +214,6 @@ EXPORT_SYMBOL(vfs_unlink); EXPORT_SYMBOL(vfs_rename); EXPORT_SYMBOL(__pollwait); EXPORT_SYMBOL(ROOT_DEV); -EXPORT_SYMBOL(add_to_page_cache_unique); EXPORT_SYMBOL(__find_get_page); EXPORT_SYMBOL(__find_lock_page); EXPORT_SYMBOL(grab_cache_page); diff --git a/kernel/module.c b/kernel/module.c index 0a7d5a42dfe3..9ceb36f9e06d 100644 --- a/kernel/module.c +++ b/kernel/module.c @@ -111,11 +111,9 @@ sys_create_module(const char *name_user, size_t size) long namelen, error; struct module *mod; + if (!capable(CAP_SYS_MODULE)) + return -EPERM; lock_kernel(); - if (!capable(CAP_SYS_MODULE)) { - error = -EPERM; - goto err0; - } if ((namelen = get_mod_name(name_user, &name)) < 0) { error = namelen; goto err0; @@ -162,13 +160,13 @@ sys_init_module(const char *name_user, struct module *mod_user) { struct module mod_tmp, *mod; char *name, *n_name; - long namelen, n_namelen, i, error = -EPERM; + long namelen, n_namelen, i, error; unsigned long mod_user_size; struct module_ref *dep; - lock_kernel(); if (!capable(CAP_SYS_MODULE)) - goto err0; + return -EPERM; + lock_kernel(); if ((namelen = get_mod_name(name_user, &name)) < 0) { error = namelen; goto err0; @@ -354,13 +352,13 @@ sys_delete_module(const char *name_user) { struct module *mod, *next; char *name; - long error = -EPERM; + long error; int something_changed; - lock_kernel(); if (!capable(CAP_SYS_MODULE)) - goto out; + return -EPERM; + lock_kernel(); if (name_user) { if ((error = get_mod_name(name_user, &name)) < 0) goto out; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 26ca3b98eddb..23be26489811 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -135,10 +135,8 @@ struct inode_operations proc_sys_inode_operations = NULL, /* get_block */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* flushpage */ NULL, /* truncate */ proc_sys_permission, /* permission */ - NULL, /* smap */ NULL /* revalidate */ }; diff --git a/mm/Makefile b/mm/Makefile index 31c1a6231644..56e93693b3b5 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -10,7 +10,7 @@ O_TARGET := mm.o O_OBJS := memory.o mmap.o filemap.o mprotect.o mlock.o mremap.o \ vmalloc.o slab.o bootmem.o swap.o vmscan.o page_io.o \ - page_alloc.o swap_state.o swapfile.o + page_alloc.o swap_state.o swapfile.o numa.o ifeq ($(CONFIG_HIGHMEM),y) O_OBJS += highmem.o diff --git a/mm/bootmem.c b/mm/bootmem.c index e0ab193dac96..6b13b9447621 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c @@ -2,6 +2,7 @@ * linux/mm/initmem.c * * Copyright (C) 1999 Ingo Molnar + * Discontiguous memory support, Kanoj Sarcar, SGI, Nov 1999 * * simple boot-time physical memory area allocator and * free memory collector. It's used to deal with reserved @@ -15,19 +16,15 @@ #include #include #include +#include #include /* - * Pointer to a bitmap - the bits represent all physical memory pages - * from physical address 0 to physical address end_mem. - * * Access to this subsystem has to be serialized externally. (this is * true for the boot process anyway) */ unsigned long max_low_pfn; -static void * bootmem_map = NULL; - /* return the number of _pages_ that will be allocated for the boot bitmap */ unsigned long __init bootmem_bootmap_pages (unsigned long pages) { @@ -43,46 +40,51 @@ unsigned long __init bootmem_bootmap_pages (unsigned long pages) /* * Called once to set up the allocator itself. */ -unsigned long __init init_bootmem (unsigned long start, unsigned long pages) +static unsigned long __init init_bootmem_core (bootmem_data_t *bdata, + unsigned long mapstart, unsigned long start, unsigned long end) { - unsigned long mapsize = (pages+7)/8; + unsigned long mapsize = ((end - start)+7)/8; - bootmem_map = phys_to_virt(start << PAGE_SHIFT); - max_low_pfn = pages; + bdata->node_bootmem_map = phys_to_virt(mapstart << PAGE_SHIFT); + bdata->node_boot_start = (start << PAGE_SHIFT); + bdata->node_low_pfn = end; /* * Initially all pages are reserved - setup_arch() has to * register free RAM areas explicitly. */ - memset(bootmem_map, 0xff, mapsize); + memset(bdata->node_bootmem_map, 0xff, mapsize); return mapsize; } /* - * Marks a particular physical memory range as usable. Usable RAM + * Marks a particular physical memory range as unallocatable. Usable RAM * might be used for boot-time allocations - or it might get added * to the free page pool later on. */ -void __init reserve_bootmem (unsigned long addr, unsigned long size) +static void __init reserve_bootmem_core(bootmem_data_t *bdata, unsigned long addr, unsigned long size) { unsigned long i; /* * round up, partially reserved pages are considered * fully reserved. */ + unsigned long sidx = (addr - bdata->node_boot_start)/PAGE_SIZE; + unsigned long eidx = (addr + size - bdata->node_boot_start + + PAGE_SIZE-1)/PAGE_SIZE; unsigned long end = (addr + size + PAGE_SIZE-1)/PAGE_SIZE; if (!size) BUG(); - if (end > max_low_pfn) + if (end > bdata->node_low_pfn) BUG(); - for (i = addr/PAGE_SIZE; i < end; i++) - if (test_and_set_bit(i, bootmem_map)) + for (i = sidx; i < eidx; i++) + if (test_and_set_bit(i, bdata->node_bootmem_map)) BUG(); } -void __init free_bootmem (unsigned long addr, unsigned long size) +static void __init free_bootmem_core(bootmem_data_t *bdata, unsigned long addr, unsigned long size) { unsigned long i; unsigned long start; @@ -90,19 +92,22 @@ void __init free_bootmem (unsigned long addr, unsigned long size) * round down end of usable mem, partially free pages are * considered reserved. */ + unsigned long sidx; + unsigned long eidx = (addr + size - bdata->node_boot_start)/PAGE_SIZE; unsigned long end = (addr + size)/PAGE_SIZE; if (!size) BUG(); - if (end > max_low_pfn) + if (end > bdata->node_low_pfn) BUG(); /* * Round up the beginning of the address. */ start = (addr + PAGE_SIZE-1) / PAGE_SIZE; + sidx = start - (bdata->node_boot_start/PAGE_SIZE); - for (i = start; i < end; i++) { - if (!test_and_clear_bit(i, bootmem_map)) + for (i = sidx; i < eidx; i++) { + if (!test_and_clear_bit(i, bdata->node_bootmem_map)) BUG(); } } @@ -116,19 +121,20 @@ void __init free_bootmem (unsigned long addr, unsigned long size) * * On low memory boxes we get it right in 100% of the cases. */ -static unsigned long last_pos = 0; -static unsigned long last_offset = 0; /* * alignment has to be a power of 2 value. */ -void * __init __alloc_bootmem (unsigned long size, unsigned long align, unsigned long goal) +static void * __init __alloc_bootmem_core (bootmem_data_t *bdata, + unsigned long size, unsigned long align, unsigned long goal) { int area = 0; unsigned long i, start = 0, reserved; void *ret; unsigned long offset, remaining_size; unsigned long areasize, preferred; + unsigned long eidx = bdata->node_low_pfn - (bdata->node_boot_start >> + PAGE_SHIFT); if (!size) BUG(); @@ -136,18 +142,17 @@ void * __init __alloc_bootmem (unsigned long size, unsigned long align, unsigned * We try to allocate bootmem pages above 'goal' * first, then we try to allocate lower pages. */ - if (goal) { - preferred = goal >> PAGE_SHIFT; - if (preferred >= max_low_pfn) - preferred = 0; + if (goal && (goal >= bdata->node_boot_start) && + ((goal >> PAGE_SHIFT) < bdata->node_low_pfn)) { + preferred = (goal - bdata->node_boot_start) >> PAGE_SHIFT; } else preferred = 0; areasize = (size+PAGE_SIZE-1)/PAGE_SIZE; restart_scan: - for (i = preferred; i < max_low_pfn; i++) { - reserved = test_bit(i, bootmem_map); + for (i = preferred; i < eidx; i++) { + reserved = test_bit(i, bdata->node_bootmem_map); if (!reserved) { if (!area) { area = 1; @@ -169,7 +174,7 @@ restart_scan: */ BUG(); found: - if (start >= max_low_pfn) + if (start >= eidx) BUG(); /* @@ -177,8 +182,8 @@ found: * of this allocation's buffer? If yes then we can 'merge' * the previous partial page with this allocation. */ - if (last_offset && (last_pos+1 == start)) { - offset = (last_offset+align-1) & ~(align-1); + if (bdata->last_offset && (bdata->last_pos+1 == start)) { + offset = (bdata->last_offset+align-1) & ~(align-1); if (offset > PAGE_SIZE) BUG(); remaining_size = PAGE_SIZE-offset; @@ -187,46 +192,50 @@ found: if (size < remaining_size) { areasize = 0; // last_pos unchanged - last_offset = offset+size; - ret = phys_to_virt(last_pos*PAGE_SIZE + offset); + bdata->last_offset = offset+size; + ret = phys_to_virt(bdata->last_pos*PAGE_SIZE + offset + + bdata->node_boot_start); } else { remaining_size = size - remaining_size; areasize = (remaining_size+PAGE_SIZE-1)/PAGE_SIZE; - ret = phys_to_virt(last_pos*PAGE_SIZE + offset); - last_pos = start+areasize-1; - last_offset = remaining_size; + ret = phys_to_virt(bdata->last_pos*PAGE_SIZE + offset + + bdata->node_boot_start); + bdata->last_pos = start+areasize-1; + bdata->last_offset = remaining_size; } - last_offset &= ~PAGE_MASK; + bdata->last_offset &= ~PAGE_MASK; } else { - last_pos = start + areasize - 1; - last_offset = size & ~PAGE_MASK; - ret = phys_to_virt(start * PAGE_SIZE); + bdata->last_pos = start + areasize - 1; + bdata->last_offset = size & ~PAGE_MASK; + ret = phys_to_virt(start * PAGE_SIZE + bdata->node_boot_start); } /* * Reserve the area now: */ for (i = start; i < start+areasize; i++) - if (test_and_set_bit(i, bootmem_map)) + if (test_and_set_bit(i, bdata->node_bootmem_map)) BUG(); memset(ret, 0, size); return ret; } -unsigned long __init free_all_bootmem (void) +static unsigned long __init free_all_bootmem_core(int nid, bootmem_data_t *bdata) { struct page * page; unsigned long i, count, total = 0; + unsigned long idx; - if (!bootmem_map) BUG(); + if (!bdata->node_bootmem_map) BUG(); - page = mem_map; + page = NODE_MEM_MAP(nid); count = 0; - for (i = 0; i < max_low_pfn; i++, page++) { - if (!test_bit(i, bootmem_map)) { + idx = bdata->node_low_pfn - (bdata->node_boot_start >> PAGE_SHIFT); + for (i = 0; i < idx; i++, page++) { + if (!test_bit(i, bdata->node_bootmem_map)) { count++; ClearPageReserved(page); set_page_count(page, 1); - if (i >= (virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT)) + if ((i+(bdata->node_boot_start >> PAGE_SHIFT)) >= (virt_to_phys((char *)MAX_DMA_ADDRESS) >> PAGE_SHIFT)) clear_bit(PG_DMA, &page->flags); __free_page(page); } @@ -237,16 +246,72 @@ unsigned long __init free_all_bootmem (void) * Now free the allocator bitmap itself, it's not * needed anymore: */ - page = mem_map + MAP_NR(bootmem_map); + page = mem_map + MAP_NR(bdata->node_bootmem_map); count = 0; - for (i = 0; i < (max_low_pfn/8 + PAGE_SIZE-1)/PAGE_SIZE; i++,page++) { + for (i = 0; i < ((bdata->node_low_pfn-(bdata->node_boot_start >> PAGE_SHIFT))/8 + PAGE_SIZE-1)/PAGE_SIZE; i++,page++) { count++; ClearPageReserved(page); set_page_count(page, 1); __free_page(page); } total += count; - bootmem_map = NULL; + bdata->node_bootmem_map = NULL; return total; } + +unsigned long __init init_bootmem_node (int nid, unsigned long freepfn, unsigned long startpfn, unsigned long endpfn) +{ + return(init_bootmem_core(NODE_DATA(nid)->bdata, freepfn, startpfn, endpfn)); +} + +void __init reserve_bootmem_node (int nid, unsigned long physaddr, unsigned long size) +{ + reserve_bootmem_core(NODE_DATA(nid)->bdata, physaddr, size); +} + +void __init free_bootmem_node (int nid, unsigned long physaddr, unsigned long size) +{ + return(free_bootmem_core(NODE_DATA(nid)->bdata, physaddr, size)); +} + +unsigned long __init free_all_bootmem_node (int nid) +{ + return(free_all_bootmem_core(nid, NODE_DATA(nid)->bdata)); +} + +unsigned long __init init_bootmem (unsigned long start, unsigned long pages) +{ + max_low_pfn = pages; + return(init_bootmem_core(NODE_DATA(0)->bdata, start, 0, pages)); +} + +void __init reserve_bootmem (unsigned long addr, unsigned long size) +{ + reserve_bootmem_core(NODE_DATA(0)->bdata, addr, size); +} + +void __init free_bootmem (unsigned long addr, unsigned long size) +{ + return(free_bootmem_core(NODE_DATA(0)->bdata, addr, size)); +} + +unsigned long __init free_all_bootmem (void) +{ + return(free_all_bootmem_core(0, NODE_DATA(0)->bdata)); +} + +void * __init __alloc_bootmem (unsigned long size, unsigned long align, unsigned long goal) +{ + /* + * In the discontigmem case, all non-node specific allocations come + * from the first node, node 0. + */ + return(__alloc_bootmem_core(NODE_DATA(0)->bdata, size, align, goal)); +} + +void * __init __alloc_bootmem_node (int nid, unsigned long size, unsigned long align, unsigned long goal) +{ + return(__alloc_bootmem_core(NODE_DATA(nid)->bdata, size, align, goal)); +} + diff --git a/mm/filemap.c b/mm/filemap.c index e85c608ee9c8..2a3380504c0b 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -124,13 +124,14 @@ void invalidate_inode_pages(struct inode * inode) * Truncate the page cache at a set offset, removing the pages * that are beyond that offset (and zeroing out partial pages). */ -void truncate_inode_pages(struct inode * inode, unsigned long start) +void truncate_inode_pages(struct inode * inode, loff_t lstart) { struct list_head *head, *curr; struct page * page; - unsigned partial = start & (PAGE_CACHE_SIZE - 1); + unsigned partial = lstart & (PAGE_CACHE_SIZE - 1); + unsigned long start; - start = (start + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; + start = (lstart + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; repeat: head = &inode->i_data.pages; @@ -151,8 +152,7 @@ repeat: lock_page(page); - if (!inode->i_op->flushpage || - inode->i_op->flushpage(inode, page, 0)) + if (!page->buffers || block_flushpage(page, 0)) lru_cache_del(page); /* @@ -195,8 +195,8 @@ repeat: lock_page(page); memclear_highpage_flush(page, partial, PAGE_CACHE_SIZE-partial); - if (inode->i_op->flushpage) - inode->i_op->flushpage(inode, page, partial); + if (page->buffers) + block_flushpage(page, partial); partial = 0; @@ -456,10 +456,8 @@ static int do_buffer_fdatasync(struct inode *inode, unsigned long start, unsigne * Two-stage data sync: first start the IO, then go back and * collect the information.. */ -int generic_buffer_fdatasync(struct inode *inode, unsigned long start, unsigned long end) +int generic_buffer_fdatasync(struct inode *inode, unsigned long start_idx, unsigned long end_idx) { - unsigned long start_idx = start >> PAGE_CACHE_SHIFT; - unsigned long end_idx = (end + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; int retval; retval = do_buffer_fdatasync(inode, start_idx, end_idx, writeout_one_page); @@ -497,7 +495,7 @@ void add_to_page_cache(struct page * page, struct address_space * mapping, unsig spin_unlock(&pagecache_lock); } -int add_to_page_cache_unique(struct page * page, +static int add_to_page_cache_unique(struct page * page, struct address_space *mapping, unsigned long offset, struct page **hash) { @@ -1434,22 +1432,19 @@ page_not_uptodate: * if the disk is full. */ static inline int do_write_page(struct inode * inode, struct file * file, - struct page * page, unsigned long offset) + struct page * page, unsigned long index) { int retval; - unsigned long size; int (*writepage) (struct dentry *, struct page *); - size = (offset << PAGE_CACHE_SHIFT) + PAGE_CACHE_SIZE; /* refuse to extend file size.. */ if (S_ISREG(inode->i_mode)) { - if (size > inode->i_size) - size = inode->i_size; + unsigned long size_idx = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; + /* Ho humm.. We should have tested for this earlier */ - if (size < offset) + if (size_idx <= index) return -EIO; } - retval = -EIO; writepage = inode->i_op->writepage; lock_page(page); @@ -1460,7 +1455,7 @@ static inline int do_write_page(struct inode * inode, struct file * file, } static int filemap_write_page(struct file *file, - unsigned long offset, + unsigned long index, struct page * page, int wait) { @@ -1477,7 +1472,7 @@ static int filemap_write_page(struct file *file, * vma/file is guaranteed to exist in the unmap/sync cases because * mmap_sem is held. */ - result = do_write_page(inode, file, page, offset); + result = do_write_page(inode, file, page, index); return result; } @@ -1949,24 +1944,6 @@ out: return err; } -/* - * Support routines for directory caching using the page cache. - */ - -/* - * Unlock and free a page. - */ -void put_cached_page(unsigned long addr) -{ - struct page * page = page_cache_entry(addr); - - UnlockPage(page); - if (page_count(page) != 2) - panic("put_cached_page: page count=%d\n", - page_count(page)); - page_cache_release(page); -} - void __init page_cache_init(unsigned long mempages) { unsigned long htable_size, order; diff --git a/mm/memory.c b/mm/memory.c index 61a8dd0a4434..88232ba05df9 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -790,11 +790,19 @@ static int do_wp_page(struct task_struct * tsk, struct vm_area_struct * vma, */ switch (page_count(old_page)) { case 2: - if (!PageSwapCache(old_page)) + /* + * Lock the page so that no one can look it up from + * the swap cache, grab a reference and start using it. + * Can not do lock_page, holding page_table_lock. + */ + if (!PageSwapCache(old_page) || TryLockPage(old_page)) break; - if (swap_count(old_page) != 1) + if (is_page_shared(old_page)) { + UnlockPage(old_page); break; - delete_from_swap_cache(old_page); + } + delete_from_swap_cache_nolock(old_page); + UnlockPage(old_page); /* FallThrough */ case 1: flush_cache_page(vma, address); @@ -885,7 +893,7 @@ static void partial_clear(struct vm_area_struct *vma, unsigned long address) * between the file and the memory map for a potential last * incomplete page. Ugly, but necessary. */ -void vmtruncate(struct inode * inode, unsigned long offset) +void vmtruncate(struct inode * inode, loff_t offset) { unsigned long partial, pgoff; struct vm_area_struct * mpnt; @@ -895,10 +903,8 @@ void vmtruncate(struct inode * inode, unsigned long offset) if (!inode->i_mmap) goto out_unlock; - partial = offset & (PAGE_CACHE_SIZE - 1); - pgoff = offset >> PAGE_CACHE_SHIFT; - if (partial) - pgoff ++; + pgoff = (offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; + partial = (unsigned long)offset & (PAGE_CACHE_SIZE - 1); mpnt = inode->i_mmap; do { @@ -976,6 +982,7 @@ static int do_swap_page(struct task_struct * tsk, struct vm_area_struct * vma, unsigned long address, pte_t * page_table, swp_entry_t entry, int write_access) { + int dograb = 0; struct page *page = lookup_swap_cache(entry); pte_t pte; @@ -992,17 +999,26 @@ static int do_swap_page(struct task_struct * tsk, vma->vm_mm->rss++; tsk->min_flt++; - swap_free(entry); pte = mk_pte(page, vma->vm_page_prot); set_bit(PG_swap_entry, &page->flags); + + /* + * Freeze the "shared"ness of the page, ie page_count + swap_count. + * Must lock page before transferring our swap count to already + * obtained page count. + */ + lock_page(page); + swap_free(entry); if (write_access && !is_page_shared(page)) { - delete_from_swap_cache(page); + delete_from_swap_cache_nolock(page); page = replace_with_highmem(page); pte = mk_pte(page, vma->vm_page_prot); pte = pte_mkwrite(pte_mkdirty(pte)); } + UnlockPage(page); + set_pte(page_table, pte); /* No need to invalidate - it was non-present before */ update_mmu_cache(vma, address, pte); diff --git a/mm/mmap.c b/mm/mmap.c index 9ca811351db2..822c654472af 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -160,8 +160,8 @@ static inline unsigned long vm_flags(unsigned long prot, unsigned long flags) #undef _trans } -unsigned long do_mmap(struct file * file, unsigned long addr, unsigned long len, - unsigned long prot, unsigned long flags, unsigned long off) +unsigned long do_mmap_pgoff(struct file * file, unsigned long addr, unsigned long len, + unsigned long prot, unsigned long flags, unsigned long pgoff) { struct mm_struct * mm = current->mm; struct vm_area_struct * vma; @@ -176,15 +176,10 @@ unsigned long do_mmap(struct file * file, unsigned long addr, unsigned long len, if (len > TASK_SIZE || addr > TASK_SIZE-len) return -EINVAL; - if (off & ~PAGE_MASK) - return -EINVAL; - /* offset overflow? */ - if (off + len < off) + if ((pgoff + (len >> PAGE_SHIFT)) < pgoff) return -EINVAL; - off = off >> PAGE_SHIFT; - /* Too many mappings? */ if (mm->map_count > MAX_MAP_COUNT) return -ENOMEM; @@ -274,7 +269,7 @@ unsigned long do_mmap(struct file * file, unsigned long addr, unsigned long len, vma->vm_flags |= VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC; vma->vm_page_prot = protection_map[vma->vm_flags & 0x0f]; vma->vm_ops = NULL; - vma->vm_pgoff = off; + vma->vm_pgoff = pgoff; vma->vm_file = NULL; vma->vm_private_data = NULL; diff --git a/mm/numa.c b/mm/numa.c new file mode 100644 index 000000000000..dcbc37c35cd1 --- /dev/null +++ b/mm/numa.c @@ -0,0 +1,103 @@ +/* + * Written by Kanoj Sarcar, SGI, Aug 1999 + */ +#include +#include +#include +#include +#include +#include +#include +#include + +int numnodes = 1; /* Initialized for UMA platforms */ + +#ifndef CONFIG_DISCONTIGMEM + +static bootmem_data_t contig_bootmem_data; +pg_data_t contig_page_data = { bdata: &contig_bootmem_data }; + +#endif /* !CONFIG_DISCONTIGMEM */ + +struct page * alloc_pages_node(int nid, int gfp_mask, unsigned long order) +{ + return __alloc_pages(NODE_DATA(nid)->node_zonelists + gfp_mask, order); +} + +#ifdef CONFIG_DISCONTIGMEM + +#define LONG_ALIGN(x) (((x)+(sizeof(long))-1)&~((sizeof(long))-1)) + +static spinlock_t node_lock = SPIN_LOCK_UNLOCKED; + +extern void show_free_areas_core(int); +extern void __init free_area_init_core(int nid, pg_data_t *pgdat, + struct page **gmap, unsigned int *zones_size, unsigned long paddr); + +void show_free_areas_node(int nid) +{ + unsigned long flags; + + spin_lock_irqsave(&node_lock, flags); + printk("Memory information for node %d:\n", nid); + show_free_areas_core(nid); + spin_unlock_irqrestore(&node_lock, flags); +} + +/* + * Nodes can be initialized parallely, in no particular order. + */ +void __init free_area_init_node(int nid, pg_data_t *pgdat, + unsigned int *zones_size, unsigned long zone_start_paddr) +{ + int i, size = 0; + struct page *discard; + + if (mem_map == (mem_map_t *)NULL) + mem_map = (mem_map_t *)PAGE_OFFSET; + + free_area_init_core(nid, pgdat, &discard, zones_size, zone_start_paddr); + + /* + * Get space for the valid bitmap. + */ + for (i = 0; i < MAX_NR_ZONES; i++) + size += zones_size[i]; + size = LONG_ALIGN((size + 7) >> 3); + pgdat->valid_addr_bitmap = (unsigned long *)alloc_bootmem_node(nid, size); + memset(pgdat->valid_addr_bitmap, 0, size); +} + +/* + * This can be refined. Currently, tries to do round robin, instead + * should do concentratic circle search, starting from current node. + */ +struct page * alloc_pages(int gfp_mask, unsigned long order) +{ + struct page *ret = 0; + unsigned long flags; + int startnode, tnode; + static int nextnid = 0; + + if (order >= MAX_ORDER) + return NULL; + spin_lock_irqsave(&node_lock, flags); + tnode = nextnid; + nextnid++; + if (nextnid == numnodes) + nextnid = 0; + spin_unlock_irqrestore(&node_lock, flags); + startnode = tnode; + while (tnode < numnodes) { + if ((ret = alloc_pages_node(tnode++, gfp_mask, order))) + return(ret); + } + tnode = 0; + while (tnode != startnode) { + if ((ret = alloc_pages_node(tnode++, gfp_mask, order))) + return(ret); + } + return(0); +} + +#endif /* CONFIG_DISCONTIGMEM */ diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 51db14c4377d..fdbb8b10a0f7 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5,6 +5,7 @@ * Swap reorganised 29.12.95, Stephen Tweedie * Support of BIGMEM added by Gerhard Wichert, Siemens AG, July 1999 * Reshaped it to be a zoned allocator, Ingo Molnar, Red Hat, 1999 + * Discontiguous memory support, Kanoj Sarcar, SGI, Nov 1999 */ #include @@ -15,18 +16,18 @@ #include #include +/* Use NUMNODES instead of numnodes for better code inside kernel APIs */ +#ifndef CONFIG_DISCONTIGMEM +#define NUMNODES 1 +#else +#define NUMNODES numnodes +#endif + int nr_swap_pages = 0; int nr_lru_pages; LIST_HEAD(lru_cache); -static zone_t zones [MAX_NR_ZONES] = - { - { SPIN_LOCK_UNLOCKED, name: "DMA" }, - { SPIN_LOCK_UNLOCKED, name: "Normal" }, - { SPIN_LOCK_UNLOCKED, name: "HighMem" } - }; - -zonelist_t zonelists [NR_GFPINDEX]; +static char *zone_names[MAX_NR_ZONES] = { "DMA", "Normal", "HighMem" }; /* * Free_page() adds the page to the free lists. This is optimized for @@ -351,10 +352,12 @@ unsigned int nr_free_pages (void) { unsigned int sum; zone_t *zone; + int i; sum = 0; - for (zone = zones; zone < zones + MAX_NR_ZONES; zone++) - sum += zone->free_pages; + for (i = 0; i < NUMNODES; i++) + for (zone = NODE_DATA(i)->node_zones; zone < NODE_DATA(i)->node_zones + MAX_NR_ZONES; zone++) + sum += zone->free_pages; return sum; } @@ -365,17 +368,24 @@ unsigned int nr_free_buffer_pages (void) { unsigned int sum; zone_t *zone; + int i; sum = nr_lru_pages; - for (zone = zones; zone <= zones+ZONE_NORMAL; zone++) - sum += zone->free_pages; + for (i = 0; i < NUMNODES; i++) + for (zone = NODE_DATA(i)->node_zones; zone <= NODE_DATA(i)->node_zones+ZONE_NORMAL; zone++) + sum += zone->free_pages; return sum; } #if CONFIG_HIGHMEM unsigned int nr_free_highpages (void) { - return zones[ZONE_HIGHMEM].free_pages; + int i; + unsigned int pages = 0; + + for (i = 0; i < NUMNODES; i++) + pages += NODE_DATA(i)->node_zones[ZONE_HIGHMEM].free_pages; + return pages; } #endif @@ -384,7 +394,7 @@ unsigned int nr_free_highpages (void) * We also calculate the percentage fragmentation. We do this by counting the * memory on each free list with the exception of the first item on the list. */ -void show_free_areas(void) +void show_free_areas_core(int nid) { unsigned long order; unsigned type; @@ -402,7 +412,7 @@ void show_free_areas(void) for (type = 0; type < MAX_NR_ZONES; type++) { struct list_head *head, *curr; - zone_t *zone = zones + type; + zone_t *zone = NODE_DATA(nid)->node_zones + type; unsigned long nr, total, flags; printk(" %s: ", zone->name); @@ -434,12 +444,15 @@ void show_free_areas(void) #endif } +void show_free_areas(void) +{ + show_free_areas_core(0); +} + /* - * Builds allocation fallback zone lists. We are basically ready - * to do NUMA-allocations, only this function has to be modified - * and the zonelists array be made per-CPU. + * Builds allocation fallback zone lists. */ -static inline void build_zonelists (void) +static inline void build_zonelists(pg_data_t *pgdat) { int i, j, k; @@ -447,7 +460,7 @@ static inline void build_zonelists (void) zonelist_t *zonelist; zone_t *zone; - zonelist = zonelists + i; + zonelist = pgdat->node_zonelists + i; memset(zonelist, 0, sizeof(*zonelist)); zonelist->gfp_mask = i; @@ -465,7 +478,7 @@ static inline void build_zonelists (void) * fallthrough: */ case ZONE_HIGHMEM: - zone = zones + ZONE_HIGHMEM; + zone = pgdat->node_zones + ZONE_HIGHMEM; if (zone->size) { #ifndef CONFIG_HIGHMEM BUG(); @@ -473,11 +486,13 @@ static inline void build_zonelists (void) zonelist->zones[j++] = zone; } case ZONE_NORMAL: - zone = zones + ZONE_NORMAL; + zone = pgdat->node_zones + ZONE_NORMAL; if (zone->size) zonelist->zones[j++] = zone; case ZONE_DMA: - zonelist->zones[j++] = zones + ZONE_DMA; + zone = pgdat->node_zones + ZONE_DMA; + if (zone->size) + zonelist->zones[j++] = zone; } zonelist->zones[j++] = NULL; } @@ -491,9 +506,10 @@ static inline void build_zonelists (void) * - mark all memory queues empty * - clear the memory bitmaps */ -void __init free_area_init(unsigned int *zones_size) +void __init free_area_init_core(int nid, pg_data_t *pgdat, struct page **gmap, + unsigned int *zones_size, unsigned long zone_start_paddr) { - struct page * p; + struct page *p, *lmem_map; unsigned long i, j; unsigned long map_size; unsigned int totalpages, offset; @@ -503,7 +519,7 @@ void __init free_area_init(unsigned int *zones_size) unsigned long size = zones_size[i]; totalpages += size; } - printk("totalpages: %08x\n", totalpages); + printk("On node %d totalpages: %08x\n", nid, totalpages); /* * Select nr of pages we try to keep free for important stuff @@ -524,16 +540,22 @@ void __init free_area_init(unsigned int *zones_size) /* * Some architectures (with lots of mem and discontinous memory * maps) have to search for a good mem_map area: + * For discontigmem, the conceptual mem map array starts from + * PAGE_OFFSET, we need to align the actual array onto a mem map + * boundary, so that MAP_NR works. */ - map_size = totalpages*sizeof(struct page); - mem_map = (struct page *) alloc_bootmem(map_size); + map_size = (totalpages + 1)*sizeof(struct page); + lmem_map = (struct page *) alloc_bootmem_node(nid, map_size); + lmem_map = (struct page *)(PAGE_OFFSET + + MAP_ALIGN((unsigned long)lmem_map - PAGE_OFFSET)); + *gmap = pgdat->node_mem_map = lmem_map; /* * Initially all pages are reserved - free ones are freed * up by free_all_bootmem() once the early boot process is * done. */ - for (p = mem_map; p < mem_map + totalpages; p++) { + for (p = lmem_map; p < lmem_map + totalpages; p++) { set_page_count(p, 0); p->flags = (1 << PG_DMA); SetPageReserved(p); @@ -541,9 +563,9 @@ void __init free_area_init(unsigned int *zones_size) memlist_init(&p->list); } - offset = 0; + offset = lmem_map - mem_map; for (j = 0; j < MAX_NR_ZONES; j++) { - zone_t *zone = zones + j; + zone_t *zone = pgdat->node_zones + j; unsigned long mask = -1; unsigned long size; @@ -551,6 +573,8 @@ void __init free_area_init(unsigned int *zones_size) printk("zone(%ld): %ld pages.\n", j, size); zone->size = size; + zone->name = zone_names[j]; + zone->lock = SPIN_LOCK_UNLOCKED; if (!size) continue; @@ -567,8 +591,10 @@ void __init free_area_init(unsigned int *zones_size) for (i = 0; i < size; i++) { struct page *page = mem_map + offset + i; page->zone = zone; - if (j != ZONE_HIGHMEM) - page->virtual = __page_address(page); + if (j != ZONE_HIGHMEM) { + page->virtual = (unsigned long)(__va(zone_start_paddr)); + zone_start_paddr += PAGE_SIZE; + } } offset += size; @@ -582,8 +608,13 @@ void __init free_area_init(unsigned int *zones_size) bitmap_size = (bitmap_size + 7) >> 3; bitmap_size = LONG_ALIGN(bitmap_size); zone->free_area[i].map = - (unsigned int *) alloc_bootmem(bitmap_size); + (unsigned int *) alloc_bootmem_node(nid, bitmap_size); } } - build_zonelists(); + build_zonelists(pgdat); +} + +void __init free_area_init(unsigned int *zones_size) +{ + free_area_init_core(0, NODE_DATA(0), &mem_map, zones_size, 0); } diff --git a/mm/swap.c b/mm/swap.c index 1d6b0d4d041d..460707ff7035 100644 --- a/mm/swap.c +++ b/mm/swap.c @@ -30,13 +30,13 @@ * start background swapping if we fall below freepages.high free * pages, and we begin intensive swapping below freepages.low. * - * These values are there to keep GCC from complaining. Actual - * initialization is done in mm/page_alloc.c or arch/sparc(64)/mm/init.c. + * Actual initialization is done in mm/page_alloc.c or + * arch/sparc(64)/mm/init.c. */ freepages_t freepages = { - 48, /* freepages.min */ - 96, /* freepages.low */ - 144 /* freepages.high */ + 0, /* freepages.min */ + 0, /* freepages.low */ + 0 /* freepages.high */ }; /* How many pages do we try to swap or page in/out together? */ diff --git a/mm/swap_state.c b/mm/swap_state.c index fdcad63f3285..44adf8bdd204 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -58,7 +58,7 @@ static inline void remove_from_swap_cache(struct page *page) if (mapping != &swapper_space) BUG(); - if (!PageSwapCache(page)) + if (!PageSwapCache(page) || !PageLocked(page)) PAGE_BUG(page); PageClearSwapCache(page); @@ -82,44 +82,46 @@ void __delete_from_swap_cache(struct page *page) swap_free(entry); } -static void delete_from_swap_cache_nolock(struct page *page) +/* + * This will never put the page into the free list, the caller has + * a reference on the page. + */ +void delete_from_swap_cache_nolock(struct page *page) { - if (block_flushpage(NULL, page, 0)) + if (block_flushpage(page, 0)) lru_cache_del(page); __delete_from_swap_cache(page); + page_cache_release(page); } /* * This must be called only on pages that have - * been verified to be in the swap cache. + * been verified to be in the swap cache and locked. */ void delete_from_swap_cache(struct page *page) { lock_page(page); - delete_from_swap_cache_nolock(page); - UnlockPage(page); - page_cache_release(page); } /* * Perform a free_page(), also freeing any swap cache associated with - * this page if it is the last user of the page. + * this page if it is the last user of the page. Can not do a lock_page, + * as we are holding the page_table_lock spinlock. */ - void free_page_and_swap_cache(struct page *page) { /* - * If we are the only user, then free up the swap cache. + * If we are the only user, then try to free up the swap cache. */ - lock_page(page); - if (PageSwapCache(page) && !is_page_shared(page)) { - delete_from_swap_cache_nolock(page); - page_cache_release(page); + if (PageSwapCache(page) && !TryLockPage(page)) { + if (!is_page_shared(page)) { + delete_from_swap_cache_nolock(page); + } + UnlockPage(page); } - UnlockPage(page); clear_bit(PG_swap_entry, &page->flags); @@ -145,10 +147,24 @@ struct page * lookup_swap_cache(swp_entry_t entry) /* * Right now the pagecache is 32-bit only. But it's a 32 bit index. =) */ +repeat: found = find_lock_page(&swapper_space, entry.val); if (!found) return 0; - if (found->mapping != &swapper_space || !PageSwapCache(found)) + /* + * Though the "found" page was in the swap cache an instant + * earlier, it might have been removed by shrink_mmap etc. + * Re search ... Since find_lock_page grabs a reference on + * the page, it can not be reused for anything else, namely + * it can not be associated with another swaphandle, so it + * is enough to check whether the page is still in the scache. + */ + if (!PageSwapCache(found)) { + UnlockPage(found); + __free_page(found); + goto repeat; + } + if (found->mapping != &swapper_space) goto out_bad; #ifdef SWAP_CACHE_INFO swap_cache_find_success++; diff --git a/mm/vmscan.c b/mm/vmscan.c index cf8f5fa35c9d..9a2f60f2072d 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -158,15 +158,15 @@ drop_pte: if (!(page = prepare_highmem_swapout(page))) goto out_swap_free; - vma->vm_mm->rss--; - set_pte(page_table, swp_entry_to_pte(entry)); - vmlist_access_unlock(vma->vm_mm); - - flush_tlb_page(vma, address); swap_duplicate(entry); /* One for the process, one for the swap cache */ /* This will also lock the page */ add_to_swap_cache(page, entry); + /* Put the swap entry into the pte after the page is in swapcache */ + vma->vm_mm->rss--; + set_pte(page_table, swp_entry_to_pte(entry)); + flush_tlb_page(vma, address); + vmlist_access_unlock(vma->vm_mm); /* OK, do a physical asynchronous write to swap. */ rw_swap_page(WRITE, page, 0); diff --git a/net/wanrouter/wanproc.c b/net/wanrouter/wanproc.c index dece3100dc26..f895fc58b846 100644 --- a/net/wanrouter/wanproc.c +++ b/net/wanrouter/wanproc.c @@ -116,10 +116,8 @@ static struct inode_operations router_inode = NULL, /* get_block */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* flushpage */ NULL, /* truncate */ router_proc_perms, /* permission */ - NULL, /* smap */ NULL /* revalidate */ }; @@ -159,10 +157,8 @@ static struct inode_operations wandev_inode = NULL, /* get_block */ NULL, /* readpage */ NULL, /* writepage */ - NULL, /* flushpage */ NULL, /* truncate */ router_proc_perms, /* permission */ - NULL, /* smap */ NULL /* revalidate */ }; -- 2.39.5