]> git.neil.brown.name Git - history.git/commitdiff
[PATCH] md 17 of 22 - Strengthen the locking of mddev.
authorNeil Brown <neilb@cse.unsw.edu.au>
Tue, 18 Jun 2002 11:17:26 +0000 (04:17 -0700)
committerLinus Torvalds <torvalds@home.transmeta.com>
Tue, 18 Jun 2002 11:17:26 +0000 (04:17 -0700)
Strengthen the locking of mddev.

mddev is only ever locked in md.c, so we move {,un}lock_mddev
out of the header and into md.c, and rename to mddev_{,un}lock
for consistancy with mddev_{get,put,find}.

When building arrays (typically at boot time) we now lock, and unlock
as it is the "right" thing to do.  The lock should never fail.

When generating /proc/mdstat, we lock each array before inspecting it.

In md_ioctl, we lock the mddev early and unlock at the end, rather than
locking in two different places.

In md_open we make sure we can get a lock before completing the open.  This
ensures that we sync with do_md_stop properly.

In md_do_recovery, we lock each mddev before checking it's status.

md_do_recovery must unlock while recovery happens, and a do_md_stop at this
point will deadlock when md_do_recovery tries to regain the lock.  This will be
fixed in a later patch.

drivers/md/md.c
include/linux/raid/md_k.h

index 1bd0c4f8bb6ac1f32a127cb28fa2219a14efa651..c598fc38513a3ed86f3a462f7418389f55c0e62e 100644 (file)
@@ -185,6 +185,21 @@ static mddev_t * mddev_find(int unit)
        return mddev;
 }
 
+static inline int mddev_lock(mddev_t * mddev)
+{
+       return down_interruptible(&mddev->reconfig_sem);
+}
+
+static inline int mddev_trylock(mddev_t * mddev)
+{
+       return down_trylock(&mddev->reconfig_sem);
+}
+
+static inline void mddev_unlock(mddev_t * mddev)
+{
+       up(&mddev->reconfig_sem);
+}
+
 mdk_rdev_t * find_rdev_nr(mddev_t *mddev, int nr)
 {
        mdk_rdev_t * rdev;
@@ -1838,24 +1853,29 @@ static void autorun_devices(void)
                mddev = mddev_find(rdev0->sb->md_minor);
                if (!mddev) {
                        printk(KERN_ERR "md: cannot allocate memory for md drive.\n");
-                       ITERATE_RDEV_GENERIC(candidates,pending,rdev,tmp)
-                               export_rdev(rdev);
                        break;
                }
-               if (mddev->sb || !list_empty(&mddev->disks)) {
+               if (mddev_lock(mddev)) 
+                       printk(KERN_WARNING "md: md%d locked, cannot run\n",
+                              mdidx(mddev));
+               else if (mddev->sb || !list_empty(&mddev->disks)) {
                        printk(KERN_WARNING "md: md%d already running, cannot run %s\n",
                               mdidx(mddev), partition_name(rdev0->dev));
-                       ITERATE_RDEV_GENERIC(candidates,pending,rdev,tmp)
-                               export_rdev(rdev);
-                       mddev_put(mddev);
-                       continue;
-               }
-               printk(KERN_INFO "md: created md%d\n", mdidx(mddev));
-               ITERATE_RDEV_GENERIC(candidates,pending,rdev,tmp) {
-                       bind_rdev_to_array(rdev, mddev);
-                       list_del_init(&rdev->pending);
+                       mddev_unlock(mddev);
+               } else {
+                       printk(KERN_INFO "md: created md%d\n", mdidx(mddev));
+                       ITERATE_RDEV_GENERIC(candidates,pending,rdev,tmp) {
+                               bind_rdev_to_array(rdev, mddev);
+                               list_del_init(&rdev->pending);
+                       }
+                       autorun_array(mddev);
+                       mddev_unlock(mddev);
                }
-               autorun_array(mddev);
+               /* on success, candidates will be empty, on error
+                * it wont...
+                */
+               ITERATE_RDEV_GENERIC(candidates,pending,rdev,tmp)
+                       export_rdev(rdev);
                mddev_put(mddev);
        }
        printk(KERN_INFO "md: ... autorun DONE.\n");
@@ -2453,7 +2473,7 @@ static int md_ioctl(struct inode *inode, struct file *file,
                case PRINT_RAID_DEBUG:
                        err = 0;
                        md_print_devices();
-                       goto done_unlock;
+                       goto done;
 
 #ifndef MODULE
                case RAID_AUTORUN:
@@ -2497,19 +2517,17 @@ static int md_ioctl(struct inode *inode, struct file *file,
                goto abort;
        }
 
+       err = mddev_lock(mddev);
+       if (err) {
+               printk(KERN_INFO "md: ioctl lock interrupted, reason %d, cmd %d\n",
+                      err, cmd);
+               goto abort;
+       }
+
        switch (cmd)
        {
                case SET_ARRAY_INFO:
 
-                       /*
-                        * alloc_mddev() should possibly self-lock.
-                        */
-                       err = lock_mddev(mddev);
-                       if (err) {
-                               printk(KERN_WARNING "md: ioctl, reason %d, cmd %d\n",
-                                      err, cmd);
-                               goto abort;
-                       }
                        if (!list_empty(&mddev->disks)) {
                                printk(KERN_WARNING "md: array md%d already has disks!\n",
                                        mdidx(mddev));
@@ -2544,9 +2562,9 @@ static int md_ioctl(struct inode *inode, struct file *file,
                        if (err) {
                                printk(KERN_WARNING "md: autostart %s failed!\n",
                                        partition_name(val_to_kdev(arg)));
-                               goto abort;
+                               goto abort_unlock;
                        }
-                       goto done;
+                       goto done_unlock;
 
                default:;
        }
@@ -2554,12 +2572,6 @@ static int md_ioctl(struct inode *inode, struct file *file,
        /*
         * Commands querying/configuring an existing array:
         */
-
-       err = lock_mddev(mddev);
-       if (err) {
-               printk(KERN_INFO "md: ioctl lock interrupted, reason %d, cmd %d\n",err, cmd);
-               goto abort;
-       }
        /* if we don't have a superblock yet, only ADD_NEW_DISK or STOP_ARRAY is allowed */
        if (!mddev->sb && cmd != ADD_NEW_DISK && cmd != STOP_ARRAY && cmd != RUN_ARRAY) {
                err = -ENODEV;
@@ -2678,7 +2690,7 @@ static int md_ioctl(struct inode *inode, struct file *file,
 
 done_unlock:
 abort_unlock:
-       unlock_mddev(mddev);
+       mddev_unlock(mddev);
 
        return err;
 done:
@@ -2694,12 +2706,21 @@ static int md_open(struct inode *inode, struct file *file)
         * Succeed if we can find or allocate a mddev structure.
         */
        mddev_t *mddev = mddev_find(minor(inode->i_rdev));
+       int err = -ENOMEM;
 
-       if (mddev) {
-               inode->i_bdev->bd_inode->u.generic_ip = mddev;
-               return 0; /* and we "own" a reference */
-       } else
-               return -ENOMEM;
+       if (!mddev)
+               goto out;
+
+       if ((err = mddev_lock(mddev)))
+               goto put;
+
+       err = 0;
+       mddev_unlock(mddev);
+       inode->i_bdev->bd_inode->u.generic_ip = mddev_get(mddev);
+ put:
+       mddev_put(mddev);
+ out:
+       return err;
 }
 
 static int md_release(struct inode *inode, struct file * file)
@@ -2987,7 +3008,7 @@ static int md_status_read_proc(char *page, char **start, off_t off,
 
        sz += sprintf(page+sz, "\n");
 
-       ITERATE_MDDEV(mddev,tmp) {
+       ITERATE_MDDEV(mddev,tmp) if (mddev_lock(mddev)==0) {
                sz += sprintf(page + sz, "md%d : %sactive", mdidx(mddev),
                                                mddev->pers ? "" : "in");
                if (mddev->pers) {
@@ -3017,6 +3038,7 @@ static int md_status_read_proc(char *page, char **start, off_t off,
 
                if (!mddev->pers) {
                        sz += sprintf(page+sz, "\n");
+                       mddev_unlock(mddev);
                        continue;
                }
 
@@ -3030,6 +3052,7 @@ static int md_status_read_proc(char *page, char **start, off_t off,
                                sz += sprintf(page + sz, "      resync=DELAYED");
                }
                sz += sprintf(page + sz, "\n");
+               mddev_unlock(mddev);
        }
        sz += status_unused(page + sz);
 
@@ -3306,36 +3329,38 @@ void md_do_recovery(void *data)
        mdp_disk_t *spare;
        struct list_head *tmp;
 
-       printk(KERN_INFO "md: recovery thread got woken up ...\n");
-restart:
-       ITERATE_MDDEV(mddev,tmp) {
+       dprintk(KERN_INFO "md: recovery thread got woken up ...\n");
+
+       ITERATE_MDDEV(mddev,tmp) if (mddev_lock(mddev)==0) {
                sb = mddev->sb;
                if (!sb)
-                       continue;
+                       goto unlock;
                if (mddev->recovery_running)
-                       continue;
+                       goto unlock;
                if (sb->active_disks == sb->raid_disks)
-                       continue;
+                       goto unlock;
                if (!sb->spare_disks) {
                        printk(KERN_ERR "md%d: no spare disk to reconstruct array! "
                               "-- continuing in degraded mode\n", mdidx(mddev));
-                       continue;
+                       goto unlock;
                }
                /*
                 * now here we get the spare and resync it.
                 */
                spare = get_spare(mddev);
                if (!spare)
-                       continue;
+                       goto unlock;
                printk(KERN_INFO "md%d: resyncing spare disk %s to replace failed disk\n",
                       mdidx(mddev), partition_name(mk_kdev(spare->major,spare->minor)));
                if (!mddev->pers->diskop)
-                       continue;
+                       goto unlock;
                if (mddev->pers->diskop(mddev, &spare, DISKOP_SPARE_WRITE))
-                       continue;
+                       goto unlock;
                down(&mddev->recovery_sem);
                mddev->recovery_running = 1;
+               mddev_unlock(mddev);
                err = md_do_sync(mddev, spare);
+               mddev_lock(mddev); /* FIXME this can fail or deadlock with do_md_close */
                if (err == -EIO) {
                        printk(KERN_INFO "md%d: spare disk %s failed, skipping to next spare.\n",
                               mdidx(mddev), partition_name(mk_kdev(spare->major,spare->minor)));
@@ -3361,7 +3386,7 @@ restart:
                                                         DISKOP_SPARE_INACTIVE);
                        up(&mddev->recovery_sem);
                        mddev->recovery_running = 0;
-                       continue;
+                       goto unlock;
                } else {
                        mddev->recovery_running = 0;
                        up(&mddev->recovery_sem);
@@ -3379,9 +3404,10 @@ restart:
                }
                mddev->sb_dirty = 1;
                md_update_sb(mddev);
-               goto restart;
+       unlock:
+               mddev_unlock(mddev);
        }
-       printk(KERN_INFO "md: recovery thread finished ...\n");
+       dprintk(KERN_INFO "md: recovery thread finished ...\n");
 
 }
 
@@ -3397,7 +3423,8 @@ int md_notify_reboot(struct notifier_block *this,
                return NOTIFY_DONE;
 
                ITERATE_MDDEV(mddev,tmp)
-                       do_md_stop (mddev, 1);
+                       if (mddev_trylock(mddev)==0)
+                               do_md_stop (mddev, 1);
                /*
                 * certain more exotic SCSI devices are known to be
                 * volatile wrt too early system reboots. While the
@@ -3693,10 +3720,19 @@ void __init md_setup_drive(void)
                        printk(KERN_ERR "md: kmalloc failed - cannot start array %d\n", minor);
                        continue;
                }
+               if (mddev_lock(mddev)) {
+                       printk(KERN_WARNING
+                              "md: Ignoring md=%d, cannot lock!\n",
+                              minor);
+                       mddev_put(mddev);
+                       continue;
+               }
+
                if (mddev->sb || !list_empty(&mddev->disks)) {
                        printk(KERN_WARNING
                               "md: Ignoring md=%d, already autodetected. (Use raid=noautodetect)\n",
                               minor);
+                       mddev_unlock(mddev);
                        mddev_put(mddev);
                        continue;
                }
@@ -3751,6 +3787,7 @@ void __init md_setup_drive(void)
                        do_md_stop(mddev, 0);
                        printk(KERN_WARNING "md: starting md%d failed\n", minor);
                }
+               mddev_unlock(mddev);
                mddev_put(mddev);
        }
 }
index d29630ebec431f204b5e7c89336f143b1773db4d..9984edf26032531c0560891352f3791d1c83df98 100644 (file)
@@ -283,16 +283,6 @@ extern mdp_disk_t *get_spare(mddev_t *mddev);
                        tmp = tmp->next, tmp->prev != &all_mddevs       \
                ; )
 
-static inline int lock_mddev (mddev_t * mddev)
-{
-       return down_interruptible(&mddev->reconfig_sem);
-}
-
-static inline void unlock_mddev (mddev_t * mddev)
-{
-       up(&mddev->reconfig_sem);
-}
-
 #define xchg_values(x,y) do { __typeof__(x) __tmp = x; \
                                x = y; y = __tmp; } while (0)