[PATCH] readahead fix

author Andrew Morton <akpm@zip.com.au>

Tue, 30 Apr 2002 06:51:50 +0000 (23:51 -0700)

committer Linus Torvalds <torvalds@home.transmeta.com>

Tue, 30 Apr 2002 06:51:50 +0000 (23:51 -0700)
author Andrew Morton <akpm@zip.com.au>
Tue, 30 Apr 2002 06:51:50 +0000 (23:51 -0700)
committer Linus Torvalds <torvalds@home.transmeta.com>
Tue, 30 Apr 2002 06:51:50 +0000 (23:51 -0700)
diff --git a/drivers/block/blkpg.c b/drivers/block/blkpg.c

index 23c9896a9421c2b563d822f757eece89afcdc1ac..1dfca6fedb8e7e4ea6931b31f1bf6a2e6d7ebb6a 100644 (file)
--- a/drivers/block/blkpg.c
+++ b/drivers/block/blkpg.c
@@ -219,6 +219,7 @@ int blk_ioctl(struct block_device *bdev, unsigned int cmd, unsigned long arg)
         unsigned short usval;
         kdev_t dev = to_kdev_t(bdev->bd_dev);
         int holder;
+       unsigned long *ra_pages;
  
         intval = block_ioctl(bdev, cmd, arg);
         if (intval != -ENOTTY)
@@ -240,13 +241,21 @@ int blk_ioctl(struct block_device *bdev, unsigned int cmd, unsigned long arg)
                 case BLKFRASET:
                         if(!capable(CAP_SYS_ADMIN))
                                 return -EACCES;
-                       return blk_set_readahead(bdev, arg);
+                       ra_pages = blk_get_ra_pages(dev);
+                       if (ra_pages == NULL)
+                               return -ENOTTY;
+                       *ra_pages = (arg * 512) / PAGE_CACHE_SIZE;
+                       return 0;
  
                 case BLKRAGET:
                 case BLKFRAGET:
                         if (!arg)
                                 return -EINVAL;
-                       return put_user(blk_get_readahead(bdev), (long *)arg);
+                       ra_pages = blk_get_ra_pages(dev);
+                       if (ra_pages == NULL)
+                               return -ENOTTY;
+                       return put_user((*ra_pages * PAGE_CACHE_SIZE) / 512,
+                                               (long *)arg);
  
                 case BLKSECTGET:
                         if ((q = blk_get_queue(dev)) == NULL)
diff --git a/drivers/block/ll_rw_blk.c b/drivers/block/ll_rw_blk.c

index 9c5a084198b7727dc75564aa7a105efdfbe83fdb..3aab4502e1354e5abe9b2b59bef25026c89ae7af 100644 (file)
--- a/drivers/block/ll_rw_blk.c
+++ b/drivers/block/ll_rw_blk.c
@@ -109,46 +109,21 @@ inline request_queue_t *blk_get_queue(kdev_t dev)
  }
  
  /**
- * blk_set_readahead - set a queue's readahead tunable
+ * blk_get_ra_pages - get the address of a queue's readahead tunable
   * @dev:       device
- * @sectors:   readahead, in 512 byte sectors
   *
- * Returns zero on success, else negative errno
- */
-int blk_set_readahead(struct block_device *bdev, unsigned sectors)
-{
-       int ret = -EINVAL;
-       request_queue_t *q = blk_get_queue(to_kdev_t(bdev->bd_dev));
-
-       if (q) {
-               q->ra_sectors = sectors;
-               blk_put_queue(q);
-               ret = 0;
-       }
-       return ret;
-}
-
-/**
- * blk_get_readahead - query a queue's readahead tunable
- * @dev:       device
- *
- * Locates the passed device's request queue and returns its
+ * Locates the passed device's request queue and returns the address of its
   * readahead setting.
   *
- * The returned value is in units of 512 byte sectors.
- *
- * Will return zero if the queue has never had its readahead
- * setting altered.
+ * Will return NULL if the request queue cannot be located.
   */
-unsigned blk_get_readahead(struct block_device *bdev)
+unsigned long *blk_get_ra_pages(kdev_t dev)
  {
-       unsigned ret = 0;
-       request_queue_t *q = blk_get_queue(to_kdev_t(bdev->bd_dev));
+       unsigned long *ret = NULL;
+       request_queue_t *q = blk_get_queue(dev);
  
-       if (q) {
-               ret = q->ra_sectors;
-               blk_put_queue(q);
-       }
+       if (q)
+               ret = &q->ra_pages;
         return ret;
  }
  
@@ -187,7 +162,7 @@ void blk_queue_make_request(request_queue_t * q, make_request_fn * mfn)
         q->max_phys_segments = MAX_PHYS_SEGMENTS;
         q->max_hw_segments = MAX_HW_SEGMENTS;
         q->make_request_fn = mfn;
-       q->ra_sectors = VM_MAX_READAHEAD << (10 - 9);   /* kbytes->sectors */
+       q->ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
         blk_queue_max_sectors(q, MAX_SECTORS);
         blk_queue_hardsect_size(q, 512);
  
diff --git a/drivers/md/md.c b/drivers/md/md.c

index d40ff34d4b0e2518b7316c755483ff01d3489e9f..1580ecd9a9d2a2d8643d82343343259b297f276e 100644 (file)
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -1521,6 +1521,7 @@ static int device_size_calculation(mddev_t * mddev)
         mdp_super_t *sb = mddev->sb;
         struct list_head *tmp;
         mdk_rdev_t *rdev;
+       unsigned long *ra_pages;
  
         /*
          * Do device size calculation. Bail out if too small.
@@ -1577,7 +1578,10 @@ static int device_size_calculation(mddev_t * mddev)
         if (!md_size[mdidx(mddev)])
                 md_size[mdidx(mddev)] = sb->size * data_disks;
  
-       readahead = (blk_get_readahead(rdev->bdev) * 512) / PAGE_SIZE;
+       readahead = (VM_MAX_READAHEAD * 1024) / PAGE_SIZE;
+       ra_pages = blk_get_ra_pages(rdev->dev);
+       if (ra_pages)
+               readahead = (*ra_pages * PAGE_CACHE_SIZE) / PAGE_SIZE;
         if (!sb->level || (sb->level == 4) || (sb->level == 5)) {
                 readahead = (mddev->sb->chunk_size>>PAGE_SHIFT) * 4 * data_disks;
                 if (readahead < data_disks * (MAX_SECTORS>>(PAGE_SHIFT-9))*2)
diff --git a/fs/block_dev.c b/fs/block_dev.c

index 46554de6d9e545fcf991fd22e4b7bdeac0e35065..8fb546dad90f424e8aa929a438a967580365a5c3 100644 (file)
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -349,6 +349,8 @@ struct block_device *bdget(dev_t dev)
                 struct inode *inode = new_inode(bd_mnt->mnt_sb);
                 if (inode) {
                         kdev_t kdev = to_kdev_t(dev);
+                       unsigned long *ra_pages;
+
                         atomic_set(&new_bdev->bd_count,1);
                         new_bdev->bd_dev = dev;
                         new_bdev->bd_op = NULL;
@@ -360,6 +362,10 @@ struct block_device *bdget(dev_t dev)
                         inode->i_bdev = new_bdev;
                         inode->i_data.a_ops = &def_blk_aops;
                         inode->i_data.gfp_mask = GFP_USER;
+                       ra_pages = blk_get_ra_pages(kdev);
+                       if (ra_pages == NULL)
+                               ra_pages = &default_ra_pages;
+                       inode->i_data.ra_pages = ra_pages;
                         spin_lock(&bdev_lock);
                         bdev = bdfind(dev, head);
                         if (!bdev) {
diff --git a/fs/inode.c b/fs/inode.c

index c6d3a3b864c05f957af9870cabb458c95ddc88c9..91d7a9da223f601468cc011e580d6040fe2e1715 100644 (file)
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -109,6 +109,9 @@ static struct inode *alloc_inode(struct super_block *sb)
                 inode->i_data.host = inode;
                 inode->i_data.gfp_mask = GFP_HIGHUSER;
                 inode->i_mapping = &inode->i_data;
+               inode->i_data.ra_pages = &default_ra_pages;
+               if (sb->s_bdev)
+                       inode->i_data.ra_pages = sb->s_bdev->bd_inode->i_mapping->ra_pages;
                 memset(&inode->u, 0, sizeof(inode->u));
         }
         return inode;
diff --git a/fs/open.c b/fs/open.c

index 304b575919d5bb0e48415d4d1ed08c3dc1b8924c..f342cd5e6a28978029a4d72a97314777c3cf0948 100644 (file)
--- a/fs/open.c
+++ b/fs/open.c
@@ -632,6 +632,7 @@ struct file *dentry_open(struct dentry *dentry, struct vfsmount *mnt, int flags)
                         goto cleanup_file;
         }
  
+       f->f_ra.ra_pages = *inode->i_mapping->ra_pages;
         f->f_dentry = dentry;
         f->f_vfsmnt = mnt;
         f->f_pos = 0;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h

index 5cb20016dd99f4c9efa852596cf9be33b93d0f23..01497e05a70a8f8e391b62f5a40aa969dadf9e87 100644 (file)
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -150,9 +150,9 @@ struct request_queue
  
         /*
          * The VM-level readahead tunable for this device.  In
-        * units of 512-byte sectors.
+        * units of PAGE_CACHE_SIZE pages.
          */
-       unsigned ra_sectors;
+       unsigned long ra_pages;
  
         /*
          * The queue owner gets to use this for whatever they like.
@@ -310,8 +310,7 @@ extern void blk_queue_hardsect_size(request_queue_t *q, unsigned short);
  extern void blk_queue_segment_boundary(request_queue_t *q, unsigned long);
  extern void blk_queue_assign_lock(request_queue_t *q, spinlock_t *);
  extern void blk_queue_prep_rq(request_queue_t *q, prep_rq_fn *pfn);
-extern int blk_set_readahead(struct block_device *bdev, unsigned sectors);
-extern unsigned blk_get_readahead(struct block_device *bdev);
+extern unsigned long *blk_get_ra_pages(kdev_t kdev);
  
  extern int blk_rq_map_sg(request_queue_t *, struct request *, struct scatterlist *);
  extern void blk_dump_rq_flags(struct request *, char *);
diff --git a/include/linux/fs.h b/include/linux/fs.h

index 31e2552276835cef219f248a7cd05a3b99c4b4d2..f0d997aeecb4c800af31abdf399ac2942ff7a242 100644 (file)
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -398,6 +398,7 @@ struct address_space {
         list_t                  i_mmap_shared;  /* list of private mappings */
         spinlock_t              i_shared_lock;  /* and spinlock protecting it */
         int                     gfp_mask;       /* how to allocate the pages */
+       unsigned long           *ra_pages;      /* device readahead */
  };
  
  struct char_device {
@@ -513,6 +514,7 @@ struct file_ra_state {
         unsigned long prev_page;        /* Cache last read() position */
         unsigned long ahead_start;      /* Ahead window */
         unsigned long ahead_size;
+       unsigned long ra_pages;         /* Maximum readahead window */
  };
  
  struct file {
diff --git a/include/linux/mm.h b/include/linux/mm.h

index 56b5f325e0dfdde28a2ac77cc153e7b4e292181e..5f1c731ddde19219ebdd47f88a745ce59ddca25f 100644 (file)
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -504,6 +504,7 @@ void do_page_cache_readahead(struct file *file,
  void page_cache_readahead(struct file *file, unsigned long offset);
  void page_cache_readaround(struct file *file, unsigned long offset);
  void handle_ra_thrashing(struct file *file);
+extern unsigned long default_ra_pages;
  
  /* vma is the first one with  address < vma->vm_end,
   * and even  address < vma->vm_start. Have to extend vma. */
diff --git a/mm/readahead.c b/mm/readahead.c

index cf7efc8d5ed5d3e6bd0673944e8745d4ea59b49a..f38fdb1a7acfba1c295c707a30aba8c2831137d5 100644 (file)
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -12,39 +12,19 @@
  #include <linux/mm.h>
  #include <linux/blkdev.h>
  
-/*
- * The readahead logic manages two readahead windows.  The "current"
- * and the "ahead" windows.
- *
- * VM_MAX_READAHEAD specifies, in kilobytes, the maximum size of
- * each of the two windows.  So the amount of readahead which is
- * in front of the file pointer varies between VM_MAX_READAHEAD and
- * VM_MAX_READAHEAD * 2.
- *
- * VM_MAX_READAHEAD only applies if the underlying request queue
- * has a zero value of ra_sectors.
- */
+unsigned long default_ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
  
  /*
   * Return max readahead size for this inode in number-of-pages.
   */
-static int get_max_readahead(struct inode *inode)
+static inline unsigned long get_max_readahead(struct file *file)
  {
-       unsigned blk_ra_kbytes = 0;
-
-       if (inode->i_sb->s_bdev) {
-               blk_ra_kbytes = blk_get_readahead(inode->i_sb->s_bdev) / 2;
-       }
-       return blk_ra_kbytes >> (PAGE_CACHE_SHIFT - 10);
+       return file->f_ra.ra_pages;
  }
  
-static int get_min_readahead(struct inode *inode)
+static inline unsigned long get_min_readahead(struct file *file)
  {
-       int ret = VM_MIN_READAHEAD / PAGE_CACHE_SIZE;
-
-       if (ret < 2)
-               ret = 2;
-       return ret;
+       return (VM_MIN_READAHEAD * 1024) / PAGE_CACHE_SIZE;
  }
  
  /*
@@ -189,7 +169,6 @@ void do_page_cache_readahead(struct file *file,
   */
  void page_cache_readahead(struct file *file, unsigned long offset)
  {
-       struct inode *inode = file->f_dentry->d_inode->i_mapping->host;
         struct file_ra_state *ra = &file->f_ra;
         unsigned long max;
         unsigned long min;
@@ -206,10 +185,10 @@ void page_cache_readahead(struct file *file, unsigned long offset)
                         goto out;
         }
  
-       max = get_max_readahead(inode);
+       max = get_max_readahead(file);
         if (max == 0)
                 goto out;       /* No readahead */
-       min = get_min_readahead(inode);
+       min = get_min_readahead(file);
  
         if (ra->next_size == 0 && offset == 0) {
                 /*
@@ -309,9 +288,9 @@ out:
   */
  void page_cache_readaround(struct file *file, unsigned long offset)
  {
+       const unsigned long min = get_min_readahead(file) * 2;
         unsigned long target;
         unsigned long backward;
-       const int min = get_min_readahead(file->f_dentry->d_inode->i_mapping->host) * 2;
  
         if (file->f_ra.next_size < min)
                 file->f_ra.next_size = min;
@@ -338,8 +317,7 @@ void page_cache_readaround(struct file *file, unsigned long offset)
   */
  void handle_ra_thrashing(struct file *file)
  {
-       struct address_space * mapping = file->f_dentry->d_inode->i_mapping;
-       const unsigned long min = get_min_readahead(mapping->host);
+       const unsigned long min = get_min_readahead(file);
  
         file->f_ra.next_size -= 3;
         if (file->f_ra.next_size < min)
author	Andrew Morton <akpm@zip.com.au>
	Tue, 30 Apr 2002 06:51:50 +0000 (23:51 -0700)
committer	Linus Torvalds <torvalds@home.transmeta.com>
	Tue, 30 Apr 2002 06:51:50 +0000 (23:51 -0700)
drivers/block/blkpg.c		patch \| blob \| history
drivers/block/ll_rw_blk.c		patch \| blob \| history
drivers/md/md.c		patch \| blob \| history
fs/block_dev.c		patch \| blob \| history
fs/inode.c		patch \| blob \| history
fs/open.c		patch \| blob \| history
include/linux/blkdev.h		patch \| blob \| history
include/linux/fs.h		patch \| blob \| history
include/linux/mm.h		patch \| blob \| history
mm/readahead.c		patch \| blob \| history