unsigned short usval;
kdev_t dev = to_kdev_t(bdev->bd_dev);
int holder;
+ unsigned long *ra_pages;
intval = block_ioctl(bdev, cmd, arg);
if (intval != -ENOTTY)
case BLKFRASET:
if(!capable(CAP_SYS_ADMIN))
return -EACCES;
- return blk_set_readahead(bdev, arg);
+ ra_pages = blk_get_ra_pages(dev);
+ if (ra_pages == NULL)
+ return -ENOTTY;
+ *ra_pages = (arg * 512) / PAGE_CACHE_SIZE;
+ return 0;
case BLKRAGET:
case BLKFRAGET:
if (!arg)
return -EINVAL;
- return put_user(blk_get_readahead(bdev), (long *)arg);
+ ra_pages = blk_get_ra_pages(dev);
+ if (ra_pages == NULL)
+ return -ENOTTY;
+ return put_user((*ra_pages * PAGE_CACHE_SIZE) / 512,
+ (long *)arg);
case BLKSECTGET:
if ((q = blk_get_queue(dev)) == NULL)
}
/**
- * blk_set_readahead - set a queue's readahead tunable
+ * blk_get_ra_pages - get the address of a queue's readahead tunable
* @dev: device
- * @sectors: readahead, in 512 byte sectors
*
- * Returns zero on success, else negative errno
- */
-int blk_set_readahead(struct block_device *bdev, unsigned sectors)
-{
- int ret = -EINVAL;
- request_queue_t *q = blk_get_queue(to_kdev_t(bdev->bd_dev));
-
- if (q) {
- q->ra_sectors = sectors;
- blk_put_queue(q);
- ret = 0;
- }
- return ret;
-}
-
-/**
- * blk_get_readahead - query a queue's readahead tunable
- * @dev: device
- *
- * Locates the passed device's request queue and returns its
+ * Locates the passed device's request queue and returns the address of its
* readahead setting.
*
- * The returned value is in units of 512 byte sectors.
- *
- * Will return zero if the queue has never had its readahead
- * setting altered.
+ * Will return NULL if the request queue cannot be located.
*/
-unsigned blk_get_readahead(struct block_device *bdev)
+unsigned long *blk_get_ra_pages(kdev_t dev)
{
- unsigned ret = 0;
- request_queue_t *q = blk_get_queue(to_kdev_t(bdev->bd_dev));
+ unsigned long *ret = NULL;
+ request_queue_t *q = blk_get_queue(dev);
- if (q) {
- ret = q->ra_sectors;
- blk_put_queue(q);
- }
+ if (q)
+ ret = &q->ra_pages;
return ret;
}
q->max_phys_segments = MAX_PHYS_SEGMENTS;
q->max_hw_segments = MAX_HW_SEGMENTS;
q->make_request_fn = mfn;
- q->ra_sectors = VM_MAX_READAHEAD << (10 - 9); /* kbytes->sectors */
+ q->ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
blk_queue_max_sectors(q, MAX_SECTORS);
blk_queue_hardsect_size(q, 512);
mdp_super_t *sb = mddev->sb;
struct list_head *tmp;
mdk_rdev_t *rdev;
+ unsigned long *ra_pages;
/*
* Do device size calculation. Bail out if too small.
if (!md_size[mdidx(mddev)])
md_size[mdidx(mddev)] = sb->size * data_disks;
- readahead = (blk_get_readahead(rdev->bdev) * 512) / PAGE_SIZE;
+ readahead = (VM_MAX_READAHEAD * 1024) / PAGE_SIZE;
+ ra_pages = blk_get_ra_pages(rdev->dev);
+ if (ra_pages)
+ readahead = (*ra_pages * PAGE_CACHE_SIZE) / PAGE_SIZE;
if (!sb->level || (sb->level == 4) || (sb->level == 5)) {
readahead = (mddev->sb->chunk_size>>PAGE_SHIFT) * 4 * data_disks;
if (readahead < data_disks * (MAX_SECTORS>>(PAGE_SHIFT-9))*2)
struct inode *inode = new_inode(bd_mnt->mnt_sb);
if (inode) {
kdev_t kdev = to_kdev_t(dev);
+ unsigned long *ra_pages;
+
atomic_set(&new_bdev->bd_count,1);
new_bdev->bd_dev = dev;
new_bdev->bd_op = NULL;
inode->i_bdev = new_bdev;
inode->i_data.a_ops = &def_blk_aops;
inode->i_data.gfp_mask = GFP_USER;
+ ra_pages = blk_get_ra_pages(kdev);
+ if (ra_pages == NULL)
+ ra_pages = &default_ra_pages;
+ inode->i_data.ra_pages = ra_pages;
spin_lock(&bdev_lock);
bdev = bdfind(dev, head);
if (!bdev) {
inode->i_data.host = inode;
inode->i_data.gfp_mask = GFP_HIGHUSER;
inode->i_mapping = &inode->i_data;
+ inode->i_data.ra_pages = &default_ra_pages;
+ if (sb->s_bdev)
+ inode->i_data.ra_pages = sb->s_bdev->bd_inode->i_mapping->ra_pages;
memset(&inode->u, 0, sizeof(inode->u));
}
return inode;
goto cleanup_file;
}
+ f->f_ra.ra_pages = *inode->i_mapping->ra_pages;
f->f_dentry = dentry;
f->f_vfsmnt = mnt;
f->f_pos = 0;
/*
* The VM-level readahead tunable for this device. In
- * units of 512-byte sectors.
+ * units of PAGE_CACHE_SIZE pages.
*/
- unsigned ra_sectors;
+ unsigned long ra_pages;
/*
* The queue owner gets to use this for whatever they like.
extern void blk_queue_segment_boundary(request_queue_t *q, unsigned long);
extern void blk_queue_assign_lock(request_queue_t *q, spinlock_t *);
extern void blk_queue_prep_rq(request_queue_t *q, prep_rq_fn *pfn);
-extern int blk_set_readahead(struct block_device *bdev, unsigned sectors);
-extern unsigned blk_get_readahead(struct block_device *bdev);
+extern unsigned long *blk_get_ra_pages(kdev_t kdev);
extern int blk_rq_map_sg(request_queue_t *, struct request *, struct scatterlist *);
extern void blk_dump_rq_flags(struct request *, char *);
list_t i_mmap_shared; /* list of private mappings */
spinlock_t i_shared_lock; /* and spinlock protecting it */
int gfp_mask; /* how to allocate the pages */
+ unsigned long *ra_pages; /* device readahead */
};
struct char_device {
unsigned long prev_page; /* Cache last read() position */
unsigned long ahead_start; /* Ahead window */
unsigned long ahead_size;
+ unsigned long ra_pages; /* Maximum readahead window */
};
struct file {
void page_cache_readahead(struct file *file, unsigned long offset);
void page_cache_readaround(struct file *file, unsigned long offset);
void handle_ra_thrashing(struct file *file);
+extern unsigned long default_ra_pages;
/* vma is the first one with address < vma->vm_end,
* and even address < vma->vm_start. Have to extend vma. */
#include <linux/mm.h>
#include <linux/blkdev.h>
-/*
- * The readahead logic manages two readahead windows. The "current"
- * and the "ahead" windows.
- *
- * VM_MAX_READAHEAD specifies, in kilobytes, the maximum size of
- * each of the two windows. So the amount of readahead which is
- * in front of the file pointer varies between VM_MAX_READAHEAD and
- * VM_MAX_READAHEAD * 2.
- *
- * VM_MAX_READAHEAD only applies if the underlying request queue
- * has a zero value of ra_sectors.
- */
+unsigned long default_ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
/*
* Return max readahead size for this inode in number-of-pages.
*/
-static int get_max_readahead(struct inode *inode)
+static inline unsigned long get_max_readahead(struct file *file)
{
- unsigned blk_ra_kbytes = 0;
-
- if (inode->i_sb->s_bdev) {
- blk_ra_kbytes = blk_get_readahead(inode->i_sb->s_bdev) / 2;
- }
- return blk_ra_kbytes >> (PAGE_CACHE_SHIFT - 10);
+ return file->f_ra.ra_pages;
}
-static int get_min_readahead(struct inode *inode)
+static inline unsigned long get_min_readahead(struct file *file)
{
- int ret = VM_MIN_READAHEAD / PAGE_CACHE_SIZE;
-
- if (ret < 2)
- ret = 2;
- return ret;
+ return (VM_MIN_READAHEAD * 1024) / PAGE_CACHE_SIZE;
}
/*
*/
void page_cache_readahead(struct file *file, unsigned long offset)
{
- struct inode *inode = file->f_dentry->d_inode->i_mapping->host;
struct file_ra_state *ra = &file->f_ra;
unsigned long max;
unsigned long min;
goto out;
}
- max = get_max_readahead(inode);
+ max = get_max_readahead(file);
if (max == 0)
goto out; /* No readahead */
- min = get_min_readahead(inode);
+ min = get_min_readahead(file);
if (ra->next_size == 0 && offset == 0) {
/*
*/
void page_cache_readaround(struct file *file, unsigned long offset)
{
+ const unsigned long min = get_min_readahead(file) * 2;
unsigned long target;
unsigned long backward;
- const int min = get_min_readahead(file->f_dentry->d_inode->i_mapping->host) * 2;
if (file->f_ra.next_size < min)
file->f_ra.next_size = min;
*/
void handle_ra_thrashing(struct file *file)
{
- struct address_space * mapping = file->f_dentry->d_inode->i_mapping;
- const unsigned long min = get_min_readahead(mapping->host);
+ const unsigned long min = get_min_readahead(file);
file->f_ra.next_size -= 3;
if (file->f_ra.next_size < min)