]> git.neil.brown.name Git - history.git/commitdiff
[XFS] Fix a use after free in the unwritten extent code. Also rework the
authorStephen Lord <lord@sgi.com>
Fri, 2 May 2003 23:54:06 +0000 (01:54 +0200)
committerStephen Lord <lord@sgi.com>
Fri, 2 May 2003 23:54:06 +0000 (01:54 +0200)
interface to the allocator to have its own flag set, and always
go through the same interface in all cases rather than having
unwritten extent requests take a different path from all others.

SGI Modid: 2.5.x-xfs:slinx:146678a

fs/xfs/linux/xfs_aops.c
fs/xfs/linux/xfs_iomap.c
fs/xfs/pagebuf/page_buf.c
fs/xfs/pagebuf/page_buf.h
fs/xfs/xfsidbg.c

index 7e9ecd1cbd2e244e38a1f2edb32d3adabb3e10ec..3d15789b4abd171d1b9f6d2582893f5c949a8597 100644 (file)
@@ -50,8 +50,6 @@ linvfs_unwritten_done(
                pagebuf_ioerror(pb, -EIO);
        if (atomic_dec_and_test(&pb->pb_io_remaining) == 1) {
                pagebuf_iodone(pb, 1, 1);
-               pb->pb_flags &= ~_PBF_LOCKABLE;
-               pagebuf_rele(pb);
        }
        end_buffer_async_write(bh, uptodate);
 }
@@ -61,28 +59,21 @@ linvfs_unwritten_done(
  * to written extents.
  */
 STATIC void
-xfs_unwritten_conv(
-       xfs_buf_t               *bp)
+linvfs_unwritten_conv(
+       xfs_buf_t       *bp)
 {
-       bhv_desc_t              *bdp = XFS_BUF_FSPRIVATE(bp, bhv_desc_t *);
-       xfs_mount_t             *mp;
-       xfs_inode_t             *ip;
+       vnode_t         *vp = XFS_BUF_FSPRIVATE(bp, vnode_t *);
+       int             error;
 
-       ip = XFS_BHVTOI(bdp);
-       mp = ip->i_mount;
-
-       if (XFS_TEST_ERROR(XFS_BUF_GETERROR(bp), mp,
-                          XFS_ERRTAG_STRATCMPL_IOERR,
-                          XFS_RANDOM_STRATCMPL_IOERR)) {
-               xfs_ioerror_alert(__FUNCTION__, mp, bp, XFS_BUF_ADDR(bp));
-       }
+       if (atomic_read(&bp->pb_hold) < 1) 
+               BUG();
 
-       XFS_IOMAP_WRITE_UNWRITTEN(mp, &ip->i_iocore,
-                                 XFS_BUF_OFFSET(bp), XFS_BUF_SIZE(bp));
+       VOP_BMAP(vp, XFS_BUF_OFFSET(bp), XFS_BUF_SIZE(bp),
+                       BMAP_UNWRITTEN, NULL, NULL, error);
        XFS_BUF_SET_FSPRIVATE(bp, NULL);
        XFS_BUF_CLR_IODONE_FUNC(bp);
        XFS_BUF_UNDATAIO(bp);
-       xfs_biodone(bp);
+       pagebuf_iodone(bp, 0, 0);
 }
 
 STATIC int
@@ -96,20 +87,20 @@ map_blocks(
        vnode_t                 *vp = LINVFS_GET_VP(inode);
        int                     error, nmaps = 1;
 
-       if (((flags & (PBF_DIRECT|PBF_SYNC)) == PBF_DIRECT) &&
+       if (((flags & (BMAP_DIRECT|BMAP_SYNC)) == BMAP_DIRECT) &&
            (offset >= inode->i_size))
                count = max_t(ssize_t, count, XFS_WRITE_IO_LOG);
 retry:
        VOP_BMAP(vp, offset, count, flags, pbmapp, &nmaps, error);
-       if (error == EAGAIN)
+       if ((error == EAGAIN) || (error == EIO))
                return -error;
-       if (unlikely((flags & (PBF_WRITE|PBF_DIRECT)) ==
-                                       (PBF_WRITE|PBF_DIRECT) && nmaps &&
+       if (unlikely((flags & (BMAP_WRITE|BMAP_DIRECT)) ==
+                                       (BMAP_WRITE|BMAP_DIRECT) && nmaps &&
                                        (pbmapp->pbm_flags & PBMF_DELAY))) {
-               flags = PBF_FILE_ALLOCATE;
+               flags = BMAP_ALLOCATE;
                goto retry;
        }
-       if (flags & (PBF_WRITE|PBF_FILE_ALLOCATE)) {
+       if (flags & (BMAP_WRITE|BMAP_ALLOCATE)) {
                VMODIFY(vp);
        }
        return -error;
@@ -371,7 +362,7 @@ map_unwritten(
        offset += p_offset;
 
        pb = pagebuf_lookup(mp->pbm_target,
-                           mp->pbm_offset, mp->pbm_bsize, _PBF_LOCKABLE);
+                           mp->pbm_offset, mp->pbm_bsize, 0);
        if (!pb)
                return -ENOMEM;
 
@@ -390,7 +381,6 @@ map_unwritten(
                tmp = match_offset_to_mapping(start_page, mp, p_offset);
                if (!tmp)
                        break;
-               BUG_ON(!(tmp->pbm_flags & PBMF_UNWRITTEN));
                map_buffer_at_offset(start_page, bh, p_offset, block_bits, mp);
                set_buffer_unwritten_io(bh);
                bh->b_private = pb;
@@ -442,15 +432,14 @@ map_unwritten(
        size <<= block_bits;    /* convert fsb's to byte range */
 
        XFS_BUF_DATAIO(pb);
+       XFS_BUF_ASYNC(pb);
        XFS_BUF_SET_SIZE(pb, size);
        XFS_BUF_SET_OFFSET(pb, offset);
-       XFS_BUF_SET_FSPRIVATE(pb, LINVFS_GET_VP(inode)->v_fbhv);
-       XFS_BUF_SET_IODONE_FUNC(pb, xfs_unwritten_conv);
+       XFS_BUF_SET_FSPRIVATE(pb, LINVFS_GET_VP(inode));
+       XFS_BUF_SET_IODONE_FUNC(pb, linvfs_unwritten_conv);
 
        if (atomic_dec_and_test(&pb->pb_io_remaining) == 1) {
                pagebuf_iodone(pb, 1, 1);
-               pb->pb_flags &= ~_PBF_LOCKABLE;
-               pagebuf_rele(pb);
        }
 
        return 0;
@@ -552,6 +541,7 @@ convert_page(
                } else {
                        set_buffer_dirty(bh);
                        unlock_buffer(bh);
+                       mark_buffer_dirty(bh);
                }
        } while (i++, (bh = bh->b_this_page) != head);
 
@@ -617,7 +607,7 @@ page_state_convert(
        unsigned long           p_offset = 0, end_index;
        loff_t                  offset, end_offset;
        int                     len, err, i, cnt = 0, uptodate = 1;
-       int                     flags = startio ? 0 : PBF_TRYLOCK;
+       int                     flags = startio ? 0 : BMAP_TRYLOCK;
        int                     page_dirty = 1;
 
 
@@ -655,7 +645,7 @@ page_state_convert(
                if (buffer_unwritten(bh)) {
                        if (!mp) {
                                err = map_blocks(inode, offset, len, &map,
-                                               PBF_FILE_UNWRITTEN);
+                                               BMAP_READ|BMAP_IGNSTATE);
                                if (err) {
                                        goto error;
                                }
@@ -677,6 +667,7 @@ page_state_convert(
                                } else {
                                        set_buffer_dirty(bh);
                                        unlock_buffer(bh);
+                                       mark_buffer_dirty(bh);
                                }
                                page_dirty = 0;
                        }
@@ -687,7 +678,7 @@ page_state_convert(
                } else if (buffer_delay(bh)) {
                        if (!mp) {
                                err = map_blocks(inode, offset, len, &map,
-                                       PBF_FILE_ALLOCATE | flags);
+                                       BMAP_ALLOCATE | flags);
                                if (err) {
                                        goto error;
                                }
@@ -702,6 +693,7 @@ page_state_convert(
                                } else {
                                        set_buffer_dirty(bh);
                                        unlock_buffer(bh);
+                                       mark_buffer_dirty(bh);
                                }
                                page_dirty = 0;
                        }
@@ -720,8 +712,8 @@ page_state_convert(
                                        size = probe_unmapped_cluster(
                                                        inode, page, bh, head);
                                        err = map_blocks(inode, offset,
-                                                       size, &map,
-                                                       PBF_WRITE | PBF_DIRECT);
+                                               size, &map,
+                                               BMAP_WRITE | BMAP_MMAP);
                                        if (err) {
                                                goto error;
                                        }
@@ -737,6 +729,7 @@ page_state_convert(
                                        } else {
                                                set_buffer_dirty(bh);
                                                unlock_buffer(bh);
+                                               mark_buffer_dirty(bh);
                                        }
                                        page_dirty = 0;
                                }
@@ -760,13 +753,11 @@ next_bh:
        if (uptodate)
                SetPageUptodate(page);
 
-       if (startio) {
+       if (startio)
                submit_page(page, bh_arr, cnt);
-       }
 
-       if (mp) {
+       if (mp)
                cluster_write(inode, page->index + 1, mp, startio, unmapped);
-       }
 
        return page_dirty;
 
@@ -797,7 +788,7 @@ linvfs_get_block_core(
        struct buffer_head      *bh_result,
        int                     create,
        int                     direct,
-       page_buf_flags_t        flags)
+       bmapi_flags_t           flags)
 {
        vnode_t                 *vp = LINVFS_GET_VP(inode);
        page_buf_bmap_t         pbmap;
@@ -817,7 +808,7 @@ linvfs_get_block_core(
                size = 1 << inode->i_blkbits;
 
        VOP_BMAP(vp, offset, size,
-               create ? flags : PBF_READ, &pbmap, &retpbbm, error);
+               create ? flags : BMAP_READ, &pbmap, &retpbbm, error);
        if (error)
                return -error;
 
@@ -887,7 +878,7 @@ linvfs_get_block(
        int                     create)
 {
        return linvfs_get_block_core(inode, iblock, 0, bh_result,
-                                       create, 0, PBF_WRITE);
+                                       create, 0, BMAP_WRITE);
 }
 
 STATIC int
@@ -898,7 +889,7 @@ linvfs_get_block_sync(
        int                     create)
 {
        return linvfs_get_block_core(inode, iblock, 0, bh_result,
-                                       create, 0, PBF_SYNC|PBF_WRITE);
+                                       create, 0, BMAP_SYNC|BMAP_WRITE);
 }
 
 STATIC int
@@ -910,7 +901,7 @@ linvfs_get_blocks_direct(
        int                     create)
 {
        return linvfs_get_block_core(inode, iblock, max_blocks, bh_result,
-                                       create, 1, PBF_WRITE|PBF_DIRECT);
+                                       create, 1, BMAP_WRITE|BMAP_DIRECT);
 }
 
 STATIC int
index 359af75131168d3b37ca63cca93c8ec2b435637b..0cd5c72698545439eb4aab63a57a67c9ee927505 100644 (file)
@@ -97,7 +97,7 @@ xfs_iomap(
 {
        xfs_mount_t     *mp = io->io_mount;
        xfs_fileoff_t   offset_fsb, end_fsb;
-       int             error;
+       int             error = 0;
        int             lockmode = 0;
        xfs_bmbt_irec_t imap;
        int             nimaps = 1;
@@ -107,32 +107,31 @@ xfs_iomap(
                return XFS_ERROR(EIO);
 
        switch (flags &
-               (PBF_READ|PBF_WRITE|PBF_FILE_ALLOCATE|PBF_FILE_UNWRITTEN)) {
-       case PBF_READ:
+               (BMAP_READ|BMAP_WRITE|BMAP_ALLOCATE|BMAP_UNWRITTEN)) {
+       case BMAP_READ:
                lockmode = XFS_LCK_MAP_SHARED(mp, io);
                bmap_flags = XFS_BMAPI_ENTIRE;
+               if (flags & BMAP_IGNSTATE)
+                       bmap_flags |= XFS_BMAPI_IGSTATE;
                break;
        case PBF_WRITE:
                lockmode = XFS_ILOCK_EXCL|XFS_EXTSIZE_WR;
                bmap_flags = 0;
                XFS_ILOCK(mp, io, lockmode);
                break;
-       case PBF_FILE_ALLOCATE:
+       case BMAP_ALLOCATE:
                lockmode = XFS_ILOCK_SHARED|XFS_EXTSIZE_RD;
                bmap_flags = XFS_BMAPI_ENTIRE;
                /* Attempt non-blocking lock */
-               if (flags & PBF_TRYLOCK) {
+               if (flags & BMAP_TRYLOCK) {
                        if (!XFS_ILOCK_NOWAIT(mp, io, lockmode))
                                return XFS_ERROR(EAGAIN);
                } else {
                        XFS_ILOCK(mp, io, lockmode);
                }
                break;
-       case PBF_FILE_UNWRITTEN:
-               lockmode = XFS_ILOCK_EXCL|XFS_EXTSIZE_WR;
-               bmap_flags = XFS_BMAPI_ENTIRE|XFS_BMAPI_IGSTATE;
-               XFS_ILOCK(mp, io, lockmode);
-               break;
+       case BMAP_UNWRITTEN:
+               goto phase2;
        default:
                BUG();
        }
@@ -148,13 +147,14 @@ xfs_iomap(
        if (error)
                goto out;
 
-       switch (flags & (PBF_WRITE|PBF_FILE_ALLOCATE)) {
-       case PBF_WRITE:
+phase2:
+       switch (flags & (BMAP_WRITE|BMAP_ALLOCATE|BMAP_UNWRITTEN)) {
+       case BMAP_WRITE:
                /* If we found an extent, return it */
                if (nimaps && (imap.br_startblock != HOLESTARTBLOCK))
                        break;
 
-               if (flags & PBF_DIRECT) {
+               if (flags & (BMAP_DIRECT|BMAP_MMAP)) {
                        error = XFS_IOMAP_WRITE_DIRECT(mp, io, offset,
                                        count, flags, &imap, &nimaps, nimaps);
                } else {
@@ -162,7 +162,7 @@ xfs_iomap(
                                        flags, &imap, &nimaps);
                }
                break;
-       case PBF_FILE_ALLOCATE:
+       case BMAP_ALLOCATE:
                /* If we found an extent, return it */
                XFS_IUNLOCK(mp, io, lockmode);
                lockmode = 0;
@@ -172,12 +172,17 @@ xfs_iomap(
 
                error = XFS_IOMAP_WRITE_ALLOCATE(mp, io, &imap, &nimaps);
                break;
+       case BMAP_UNWRITTEN:
+               lockmode = 0;
+               error = XFS_IOMAP_WRITE_UNWRITTEN(mp, io, offset, count);
+               nimaps = 0;
+               break;
        }
 
        if (nimaps) {
                *npbmaps = _xfs_imap_to_bmap(io, offset, &imap,
                                                pbmapp, nimaps, *npbmaps);
-       } else {
+       } else if (npbmaps) {
                *npbmaps = 0;
        }
 
@@ -203,13 +208,13 @@ xfs_flush_space(
                        xfs_ilock(ip, XFS_ILOCK_EXCL);
                        *fsynced = 1;
                } else {
-                       *ioflags |= PBF_SYNC;
+                       *ioflags |= BMAP_SYNC;
                        *fsynced = 2;
                }
                return 0;
        case 1:
                *fsynced = 2;
-               *ioflags |= PBF_SYNC;
+               *ioflags |= BMAP_SYNC;
                return 0;
        case 2:
                xfs_iunlock(ip, XFS_ILOCK_EXCL);
@@ -228,7 +233,7 @@ xfs_iomap_write_direct(
        xfs_inode_t     *ip,
        loff_t          offset,
        size_t          count,
-       int             ioflag,
+       int             flags,
        xfs_bmbt_irec_t *ret_imap,
        int             *nmaps,
        int             found)
@@ -342,7 +347,7 @@ xfs_iomap_write_direct(
        xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
        xfs_trans_ihold(tp, ip);
 
-       if (offset < ip->i_d.di_size || rt)
+       if (!(flags & BMAP_MMAP) && (offset < ip->i_d.di_size || rt))
                bmapi_flag |= XFS_BMAPI_PREALLOC;
 
        /*
@@ -441,7 +446,7 @@ retry:
         * We don't bother with this for sync writes, because we need
         * to minimize the amount we write for good performance.
         */
-       if (!(ioflag & PBF_SYNC) && ((offset + count) > ip->i_d.di_size)) {
+       if (!(ioflag & BMAP_SYNC) && ((offset + count) > ip->i_d.di_size)) {
                xfs_off_t       aligned_offset;
                unsigned int    iosize;
                xfs_fileoff_t   ioalign;
index c1b53bfb6967f2c8062a0853ef01e2938d4ca6bc..6b44090ddefc385b7ccabb01788231bfa424b8e7 100644 (file)
@@ -1575,6 +1575,13 @@ pagebuf_delwri_dequeue(
        spin_unlock(&pbd_delwrite_lock);
 }
 
+STATIC void
+pagebuf_runall_queues(
+       struct workqueue_struct *queue)
+{
+       flush_workqueue(queue);
+}
+
 /* Defines for pagebuf daemon */
 DECLARE_WAIT_QUEUE_HEAD(pbd_waitq);
 STATIC int force_flush;
@@ -1680,10 +1687,13 @@ pagebuf_delwri_flush(
        page_buf_t              *pb;
        struct list_head        *curr, *next, tmp;
        int                     pincount = 0;
+       int                     flush_cnt = 0;
 
        spin_lock(&pbd_delwrite_lock);
        INIT_LIST_HEAD(&tmp);
 
+       pagebuf_runall_queues(pagebuf_dataio_workqueue);
+
        list_for_each_safe(curr, next, &pbd_delwrite_queue) {
                pb = list_entry(curr, page_buf_t, pb_list);
 
@@ -1725,6 +1735,10 @@ pagebuf_delwri_flush(
                pb->pb_flags |= PBF_WRITE;
 
                __pagebuf_iorequest(pb);
+               if (++flush_cnt > 32) {
+                       pagebuf_run_queues(NULL);
+                       flush_cnt = 0;
+               }
 
                spin_lock(&pbd_delwrite_lock);
        }
index 4d400d0f65832573db5024d827eccfd5005efb2b..2a24ba8c6fb0dbf908f6f7d18feb972f1a673f0d 100644 (file)
@@ -83,6 +83,20 @@ typedef enum {                               /* pbm_flags values */
                                        /* but uninitialized file data  */
 } bmap_flags_t;
 
+typedef enum {
+       /* base extent manipulation calls */
+       BMAP_READ = (1 << 0),           /* read extents */
+       BMAP_WRITE = (1 << 1),          /* create extents */
+       BMAP_ALLOCATE = (1 << 2),       /* delayed allocate to real extents */
+       BMAP_UNWRITTEN  = (1 << 3),     /* unwritten extents to real extents */
+       /* modifiers */
+       BMAP_IGNSTATE = (1 << 4),       /* ignore unwritten state on read */
+       BMAP_DIRECT = (1 << 5),         /* direct instead of buffered write */
+       BMAP_MMAP = (1 << 6),           /* allocate for mmap write */
+       BMAP_SYNC = (1 << 7),           /* sync write */
+       BMAP_TRYLOCK = (1 << 8),        /* non-blocking request */
+} bmapi_flags_t;
+
 typedef enum page_buf_flags_e {                /* pb_flags values */
        PBF_READ = (1 << 0),    /* buffer intended for reading from device */
        PBF_WRITE = (1 << 1),   /* buffer intended for writing to device   */
@@ -101,20 +115,18 @@ typedef enum page_buf_flags_e {           /* pb_flags values */
        /* flags used only as arguments to access routines */
        PBF_LOCK = (1 << 13),   /* lock requested                          */
        PBF_TRYLOCK = (1 << 14), /* lock requested, but do not wait        */
-       PBF_FILE_ALLOCATE = (1 << 15), /* allocate all file space          */
-       PBF_DONT_BLOCK = (1 << 16), /* do not block in current thread      */
-       PBF_DIRECT = (1 << 17), /* direct I/O desired                      */
-       PBF_FILE_UNWRITTEN = (1 << 18), /* convert unwritten extent space  */
+       PBF_DONT_BLOCK = (1 << 15), /* do not block in current thread      */
 
        /* flags used only internally */
-       _PBF_LOCKABLE = (1 << 19), /* page_buf_t may be locked             */
-       _PBF_ALL_PAGES_MAPPED = (1 << 21), /* all pages in range mapped    */
-       _PBF_ADDR_ALLOCATED = (1 << 22), /* pb_addr space was allocated    */
-       _PBF_MEM_ALLOCATED = (1 << 23), /* pb_mem+underlying pages alloc'd */
-
-       PBF_FORCEIO = (1 << 24),
-       PBF_FLUSH = (1 << 25),  /* flush disk write cache                  */
-       PBF_READ_AHEAD = (1 << 26),
+       _PBF_LOCKABLE = (1 << 16), /* page_buf_t may be locked             */
+       _PBF_PRIVATE_BH = (1 << 17), /* do not use public buffer heads     */
+       _PBF_ALL_PAGES_MAPPED = (1 << 18), /* all pages in range mapped    */
+       _PBF_ADDR_ALLOCATED = (1 << 19), /* pb_addr space was allocated    */
+       _PBF_MEM_ALLOCATED = (1 << 20), /* pb_mem+underlying pages alloc'd */
+
+       PBF_FORCEIO = (1 << 21),
+       PBF_FLUSH = (1 << 22),  /* flush disk write cache                  */
+       PBF_READ_AHEAD = (1 << 23),
 
 } page_buf_flags_t;
 
index b77b2308cf007494243fa9ff5e201d706e3a2921..793ca4cab415acf40df9d522d8e561f727c47b60 100644 (file)
@@ -1732,11 +1732,9 @@ static char      *pb_flag_vals[] = {
 /*  0 */ "READ", "WRITE", "MAPPED", "PARTIAL", "ASYNC",
 /*  5 */ "NONE", "DELWRI", "FREED", "SYNC", "MAPPABLE",
 /* 10 */ "STALE", "FS_MANAGED", "INVALID12", "LOCK", "TRYLOCK",
-/* 15 */ "FILE_ALLOCATE", "DONT_BLOCK", "DIRECT", "INVALID18", "LOCKABLE",
-/* 20 */ "PRIVATE_BH", "ALL_PAGES_MAPPED", "ADDR_ALLOCATED", "MEM_ALLOCATED",
-        "FORCEIO",
-/* 25 */ "FLUSH", "READ_AHEAD", "INVALID27", "INVALID28", "INVALID29",
-/* 30 */ "INVALID30", "INVALID31",
+/* 15 */ "DONT_BLOCK", "LOCKABLE", "PRIVATE_BH", "ALL_PAGES_MAPPED", 
+        "ADDR_ALLOCATED",
+/* 20 */ "MEM_ALLOCATED", "FORCEIO", "FLUSH", "READ_AHEAD",
         NULL };
 
 static char    *pbm_flag_vals[] = {