[PATCH] md: Remove per-personality 'operational' and 'write_only' flags

author Neil Brown <neilb@cse.unsw.edu.au>

Fri, 23 Aug 2002 04:27:25 +0000 (21:27 -0700)

committer Linus Torvalds <torvalds@home.transmeta.com>

Fri, 23 Aug 2002 04:27:25 +0000 (21:27 -0700)
author Neil Brown <neilb@cse.unsw.edu.au>
Fri, 23 Aug 2002 04:27:25 +0000 (21:27 -0700)
committer Linus Torvalds <torvalds@home.transmeta.com>
Fri, 23 Aug 2002 04:27:25 +0000 (21:27 -0700)
diff --git a/drivers/md/md.c b/drivers/md/md.c

index 4a47cb9ffbf1081355370d5e9bc494cae5fa6d62..4ac4806831008f733a66fe738d55234f2be2dcba 100644 (file)
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -365,9 +365,6 @@ static void free_disk_sb(mdk_rdev_t * rdev)
                 rdev->sb_page = NULL;
                 rdev->sb_offset = 0;
                 rdev->size = 0;
-       } else {
-               if (!rdev->faulty)
-                       MD_BUG();
         }
  }
  
@@ -586,7 +583,6 @@ static void export_rdev(mdk_rdev_t * rdev)
         md_autodetect_dev(rdev->bdev->bd_dev);
  #endif
         unlock_rdev(rdev);
-       rdev->faulty = 0;
         kfree(rdev);
  }
  
@@ -671,9 +667,9 @@ static void print_sb(mdp_super_t *sb)
  
  static void print_rdev(mdk_rdev_t *rdev)
  {
-       printk(KERN_INFO "md: rdev %s, SZ:%08ld F:%d DN:%d ",
+       printk(KERN_INFO "md: rdev %s, SZ:%08ld F:%d S:%d DN:%d ",
                 bdev_partition_name(rdev->bdev),
-               rdev->size, rdev->faulty, rdev->desc_nr);
+               rdev->size, rdev->faulty, rdev->in_sync, rdev->desc_nr);
         if (rdev->sb) {
                 printk(KERN_INFO "md: rdev superblock:\n");
                 print_sb(rdev->sb);
@@ -1006,6 +1002,7 @@ static mdk_rdev_t *md_import_device(dev_t newdev, int on_disk)
         }
         rdev->desc_nr = -1;
         rdev->faulty = 0;
+       rdev->in_sync = 0;
         atomic_set(&rdev->nr_pending, 0);
  
         size = rdev->bdev->bd_inode->i_size >> BLOCK_SIZE_BITS;
@@ -2182,14 +2179,13 @@ static int set_array_info(mddev_t * mddev, mdu_array_info_t *info)
  static int set_disk_faulty(mddev_t *mddev, dev_t dev)
  {
         mdk_rdev_t *rdev;
-       int ret;
  
         rdev = find_rdev(mddev, dev);
         if (!rdev)
                 return 0;
  
-       ret = md_error(mddev, rdev);
-       return ret;
+       md_error(mddev, rdev);
+       return 1;
  }
  
  static int md_ioctl(struct inode *inode, struct file *file,
@@ -2604,9 +2600,8 @@ static void md_recover_arrays(void)
  }
  
  
-int md_error(mddev_t *mddev, mdk_rdev_t *rdev)
+void md_error(mddev_t *mddev, mdk_rdev_t *rdev)
  {
-
         dprintk("md_error dev:(%d:%d), rdev:(%d:%d), (caller: %p,%p,%p,%p).\n",
                 MD_MAJOR,mdidx(mddev),MAJOR(bdev->bd_dev),MINOR(bdev->bd_dev),
                 __builtin_return_address(0),__builtin_return_address(1),
@@ -2614,25 +2609,15 @@ int md_error(mddev_t *mddev, mdk_rdev_t *rdev)
  
         if (!mddev) {
                 MD_BUG();
-               return 0;
+               return;
         }
  
         if (!rdev || rdev->faulty)
-               return 0;
-       if (!mddev->pers->error_handler
-                       || mddev->pers->error_handler(mddev,rdev) <= 0) {
-               rdev->faulty = 1;
-               rdev->in_sync = 0;
-       } else
-               return 1;
-       /*
-        * if recovery was running, stop it now.
-        */
-       if (mddev->recovery_running) 
-               mddev->recovery_running = -EIO;
+               return;
+       if (!mddev->pers->error_handler)
+               return;
+       mddev->pers->error_handler(mddev,rdev);
         md_recover_arrays();
-
-       return 0;
  }
  
  static int status_unused(char * page)
@@ -3510,7 +3495,7 @@ static int __init raid_setup(char *str)
         return 1;
  }
  
-int __init md_run_setup(void)
+static int __init md_run_setup(void)
  {
         if (raid_setup_args.noautodetect)
                 printk(KERN_INFO "md: Skipping autodetection of RAID arrays. (raid=noautodetect)\n");
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c

index a4545e09942336c4bdaec05f7b777b4cb355062c..6c50e6d29c8614a94c183e0a93bb6a8e3cf5f5ae 100644 (file)
--- a/drivers/md/multipath.c
+++ b/drivers/md/multipath.c
@@ -70,7 +70,7 @@ static void mp_pool_free(void *mpb, void *data)
         kfree(mpb);
  }
  
-static int multipath_map (mddev_t *mddev, mdk_rdev_t **rdev)
+static int multipath_map (mddev_t *mddev, mdk_rdev_t **rdevp)
  {
         multipath_conf_t *conf = mddev_to_conf(mddev);
         int i, disks = MD_SB_DISKS;
@@ -82,10 +82,10 @@ static int multipath_map (mddev_t *mddev, mdk_rdev_t **rdev)
  
         spin_lock_irq(&conf->device_lock);
         for (i = 0; i < disks; i++) {
-               if (conf->multipaths[i].operational &&
-                       conf->multipaths[i].rdev) {
-                       *rdev = conf->multipaths[i].rdev;
-                       atomic_inc(&(*rdev)->nr_pending);
+               mdk_rdev_t *rdev = conf->multipaths[i].rdev;
+               if (rdev && rdev->in_sync) {
+                       *rdevp = rdev;
+                       atomic_inc(&rdev->nr_pending);
                         spin_unlock_irq(&conf->device_lock);
                         return 0;
                 }
@@ -158,10 +158,11 @@ static int multipath_read_balance (multipath_conf_t *conf)
  {
         int disk;
  
-       for (disk = 0; disk < MD_SB_DISKS; disk++)      
-               if (conf->multipaths[disk].operational &&
-                       conf->multipaths[disk].rdev)
+       for (disk = 0; disk < MD_SB_DISKS; disk++) {
+               mdk_rdev_t *rdev = conf->multipaths[disk].rdev;
+               if (rdev && rdev->in_sync)
                         return disk;
+       }
         BUG();
         return 0;
  }
@@ -204,7 +205,8 @@ static int multipath_status (char *page, mddev_t *mddev)
                                                  conf->working_disks);
         for (i = 0; i < conf->raid_disks; i++)
                 sz += sprintf (page+sz, "%s",
-                       conf->multipaths[i].operational ? "U" : "_");
+                              conf->multipaths[i].rdev && 
+                              conf->multipaths[i].rdev->in_sync ? "U" : "_");
         sz += sprintf (page+sz, "]");
         return sz;
  }
@@ -219,28 +221,13 @@ static int multipath_status (char *page, mddev_t *mddev)
  "multipath: IO failure on %s, disabling IO path. \n" \
  "      Operation continuing on %d IO paths.\n"
  
-static void mark_disk_bad (mddev_t *mddev, int failed)
-{
-       multipath_conf_t *conf = mddev_to_conf(mddev);
-       struct multipath_info *multipath = conf->multipaths+failed;
-
-       multipath->operational = 0;
-       mddev->sb_dirty = 1;
-       conf->working_disks--;
-       printk (DISK_FAILED, bdev_partition_name (multipath->rdev->bdev),
-                                conf->working_disks);
-}
  
  /*
   * Careful, this can execute in IRQ contexts as well!
   */
-static int multipath_error (mddev_t *mddev, mdk_rdev_t *rdev)
+static void multipath_error (mddev_t *mddev, mdk_rdev_t *rdev)
  {
         multipath_conf_t *conf = mddev_to_conf(mddev);
-       struct multipath_info * multipaths = conf->multipaths;
-       int disks = MD_SB_DISKS;
-       int i;
-
  
         if (conf->working_disks <= 1) {
                 /*
@@ -248,24 +235,21 @@ static int multipath_error (mddev_t *mddev, mdk_rdev_t *rdev)
                  * first check if this is a queued request for a device
                  * which has just failed.
                  */
-               for (i = 0; i < disks; i++) {
-                       if (multipaths[i].rdev == rdev && !multipaths[i].operational)
-                               return 0;
-               }
                 printk (LAST_DISK);
-               return 1; /* leave it active... it's all we have */
+               /* leave it active... it's all we have */
         } else {
                 /*
                  * Mark disk as unusable
                  */
-               for (i = 0; i < disks; i++) {
-                       if (multipaths[i].rdev == rdev && multipaths[i].operational) {
-                               mark_disk_bad(mddev, i);
-                               break;
-                       }
+               if (!rdev->faulty) {
+                       rdev->in_sync = 0;
+                       rdev->faulty = 1;
+                       mddev->sb_dirty = 1;
+                       conf->working_disks--;
+                       printk (DISK_FAILED, bdev_partition_name (rdev->bdev),
+                               conf->working_disks);
                 }
         }
-       return 0;
  }
  
  #undef LAST_DISK
@@ -290,7 +274,7 @@ static void print_multipath_conf (multipath_conf_t *conf)
                 tmp = conf->multipaths + i;
                 if (tmp->rdev)
                         printk(" disk%d, o:%d, dev:%s\n",
-                               i,tmp->operational,
+                               i,!tmp->rdev->faulty,
                                bdev_partition_name(tmp->rdev->bdev));
         }
  }
@@ -308,7 +292,6 @@ static int multipath_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
         for (path=0; path<mddev->raid_disks; path++) 
                 if ((p=conf->multipaths+path)->rdev == NULL) {
                         p->rdev = rdev;
-                       p->operational = 1;
                         conf->working_disks++;
                         rdev->raid_disk = path;
                         found = 1;
@@ -329,8 +312,8 @@ static int multipath_remove_disk(mddev_t *mddev, int number)
         spin_lock_irq(&conf->device_lock);
  
         if (p->rdev) {
-               if (p->operational ||
-                   (p->rdev && atomic_read(&p->rdev->nr_pending))) {
+               if (p->rdev->in_sync ||
+                   atomic_read(&p->rdev->nr_pending)) {
                         printk(KERN_ERR "hot-remove-disk, slot %d is identified but is still operational!\n", number);
                         err = -EBUSY;
                         goto abort;
@@ -474,18 +457,8 @@ static int multipath_run (mddev_t *mddev)
  
                 disk = conf->multipaths + disk_idx;
                 disk->rdev = rdev;
-               if (rdev->faulty) 
-                       disk->operational = 0;
-               else {
-
-                       /*
-                        * Mark all disks as active to start with, there are no
-                        * spares.  multipath_read_balance deals with choose
-                        * the "best" operational device.
-                        */
-                       disk->operational = 1;
+               if (!rdev->faulty) 
                         conf->working_disks++;
-               }
         }
  
         conf->raid_disks = mddev->raid_disks;
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c

index 88a0e42618a321222c35b9aebeb7120a7513a69e..adc08135cbfa60410c551780b1579fc47bf7e643 100644 (file)
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -188,7 +188,7 @@ static inline void put_buf(r1bio_t *r1_bio)
         mempool_free(r1_bio, conf->r1buf_pool);
  }
  
-static int map(mddev_t *mddev, mdk_rdev_t **rdev)
+static int map(mddev_t *mddev, mdk_rdev_t **rdevp)
  {
         conf_t *conf = mddev_to_conf(mddev);
         int i, disks = conf->raid_disks;
@@ -200,11 +200,10 @@ static int map(mddev_t *mddev, mdk_rdev_t **rdev)
  
         spin_lock_irq(&conf->device_lock);
         for (i = 0; i < disks; i++) {
-               if (conf->mirrors[i].operational
-                   && !conf->mirrors[i].write_only
-                   && conf->mirrors[i].rdev) {
-                       *rdev = conf->mirrors[i].rdev;
-                       atomic_inc(&(*rdev)->nr_pending);
+               mdk_rdev_t *rdev = conf->mirrors[i].rdev;
+               if (rdev && rdev->in_sync) {
+                       *rdevp = rdev;
+                       atomic_inc(&rdev->nr_pending);
                         spin_unlock_irq(&conf->device_lock);
                         return 0;
                 }
@@ -346,7 +345,9 @@ static int read_balance(conf_t *conf, struct bio *bio, r1bio_t *r1_bio)
         if (!conf->mddev->in_sync && (this_sector + sectors >= conf->next_resync)) {
                 /* make sure that disk is operational */
                 new_disk = 0;
-               while (!conf->mirrors[new_disk].operational || conf->mirrors[new_disk].write_only) {
+
+               while (!conf->mirrors[new_disk].rdev ||
+                      !conf->mirrors[new_disk].rdev->in_sync) {
                         new_disk++;
                         if (new_disk == conf->raid_disks) {
                                 new_disk = 0;
@@ -358,7 +359,8 @@ static int read_balance(conf_t *conf, struct bio *bio, r1bio_t *r1_bio)
  
  
         /* make sure the disk is operational */
-       while (!conf->mirrors[new_disk].operational || conf->mirrors[new_disk].write_only) {
+       while (!conf->mirrors[new_disk].rdev ||
+              !conf->mirrors[new_disk].rdev->in_sync) {
                 if (new_disk <= 0)
                         new_disk = conf->raid_disks;
                 new_disk--;
@@ -387,8 +389,8 @@ static int read_balance(conf_t *conf, struct bio *bio, r1bio_t *r1_bio)
                         disk = conf->raid_disks;
                 disk--;
  
-               if (conf->mirrors[disk].write_only ||
-                   !conf->mirrors[disk].operational)
+               if (!conf->mirrors[disk].rdev ||
+                   !conf->mirrors[disk].rdev->in_sync)
                         continue;
  
                 if (!atomic_read(&conf->mirrors[disk].rdev->nr_pending)) {
@@ -509,8 +511,8 @@ static int make_request(request_queue_t *q, struct bio * bio)
          */
         spin_lock_irq(&conf->device_lock);
         for (i = 0;  i < disks; i++) {
-               if (conf->mirrors[i].operational &&
-                   conf->mirrors[i].rdev) {
+               if (conf->mirrors[i].rdev &&
+                   !conf->mirrors[i].rdev->faulty) {
                         atomic_inc(&conf->mirrors[i].rdev->nr_pending);
                         r1_bio->write_bios[i] = bio;
                 } else
@@ -573,7 +575,8 @@ static int status(char *page, mddev_t *mddev)
                                                 conf->working_disks);
         for (i = 0; i < conf->raid_disks; i++)
                 sz += sprintf(page+sz, "%s",
-                       conf->mirrors[i].operational ? "U" : "_");
+                             conf->mirrors[i].rdev &&
+                             conf->mirrors[i].rdev->in_sync ? "U" : "_");
         sz += sprintf (page+sz, "]");
         return sz;
  }
@@ -594,49 +597,37 @@ static int status(char *page, mddev_t *mddev)
  #define ALREADY_SYNCING KERN_INFO \
  "raid1: syncing already in progress.\n"
  
-static void mark_disk_bad(mddev_t *mddev, int failed)
-{
-       conf_t *conf = mddev_to_conf(mddev);
-       mirror_info_t *mirror = conf->mirrors+failed;
-
-       mirror->operational = 0;
-       if (!mirror->write_only) {
-               mddev->degraded++;
-               conf->working_disks--;
-       }
-       mddev->sb_dirty = 1;
-       printk(DISK_FAILED, bdev_partition_name(mirror->rdev->bdev), conf->working_disks);
-}
  
-static int error(mddev_t *mddev, mdk_rdev_t *rdev)
+static void error(mddev_t *mddev, mdk_rdev_t *rdev)
  {
         conf_t *conf = mddev_to_conf(mddev);
-       mirror_info_t * mirrors = conf->mirrors;
-       int disks = conf->raid_disks;
-       int i;
  
         /*
-        * Find the drive.
          * If it is not operational, then we have already marked it as dead
          * else if it is the last working disks, ignore the error, let the
          * next level up know.
          * else mark the drive as failed
          */
-       for (i = 0; i < disks; i++)
-               if (mirrors[i].operational && mirrors[i].rdev == rdev)
-                       break;
-       if (i == disks)
-               return 0;
-
-       if (mirrors[i].operational && !mirrors[i].write_only
+       if (rdev->in_sync
             && conf->working_disks == 1)
                 /*
                  * Don't fail the drive, act as though we were just a
                  * normal single drive
                  */
-               return 1;
-       mark_disk_bad(mddev, i);
-       return 0;
+               return;
+       if (rdev->in_sync) {
+               mddev->degraded++;
+               conf->working_disks--;
+               /*
+                * if recovery was running, stop it now.
+                */
+               if (mddev->recovery_running) 
+                       mddev->recovery_running = -EIO;
+       }
+       rdev->in_sync = 0;
+       rdev->faulty = 1;
+       mddev->sb_dirty = 1;
+       printk(DISK_FAILED, bdev_partition_name(rdev->bdev), conf->working_disks);
  }
  
  static void print_conf(conf_t *conf)
@@ -656,7 +647,7 @@ static void print_conf(conf_t *conf)
                 tmp = conf->mirrors + i;
                 if (tmp->rdev)
                         printk(" disk %d, wo:%d, o:%d, dev:%s\n",
-                              i, tmp->write_only, tmp->operational,
+                              i, !tmp->rdev->in_sync, !tmp->rdev->faulty,
                                bdev_partition_name(tmp->rdev->bdev));
         }
  }
@@ -688,12 +679,11 @@ static int raid1_spare_active(mddev_t *mddev)
          */
         for (i = 0; i < conf->raid_disks; i++) {
                 tmp = conf->mirrors + i;
-               if (tmp->operational && tmp->rdev 
+               if (tmp->rdev 
                     && !tmp->rdev->faulty
-                   && tmp->write_only) {
+                   && !tmp->rdev->in_sync) {
                         conf->working_disks++;
                         mddev->degraded--;
-                       tmp->write_only = 0;
                         tmp->rdev->in_sync = 1;
                 }
         }
@@ -715,8 +705,6 @@ static int raid1_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
         for (mirror=0; mirror < mddev->raid_disks; mirror++)
                 if ( !(p=conf->mirrors+mirror)->rdev) {
                         p->rdev = rdev;
-                       p->write_only = 1;
-                       p->operational = 1;
                         p->head_position = 0;
                         rdev->raid_disk = mirror;
                         found = 1;
@@ -737,8 +725,8 @@ static int raid1_remove_disk(mddev_t *mddev, int number)
         print_conf(conf);
         spin_lock_irq(&conf->device_lock);
         if (p->rdev) {
-               if (p->operational ||
-                       (p->rdev && atomic_read(&p->rdev->nr_pending))) {
+               if (p->rdev->in_sync ||
+                   atomic_read(&p->rdev->nr_pending)) {
                         err = -EBUSY;
                         goto abort;
                 }
@@ -837,20 +825,19 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio)
         spin_lock_irq(&conf->device_lock);
         for (i = 0; i < disks ; i++) {
                 r1_bio->write_bios[i] = NULL;
-               if (!conf->mirrors[i].operational)
+               if (!conf->mirrors[i].rdev || 
+                   conf->mirrors[i].rdev->faulty)
                         continue;
                 if (i == conf->last_used)
                         /*
                          * we read from here, no need to write
                          */
                         continue;
-               if (!conf->mirrors[i].write_only && mddev->in_sync)
+               if (conf->mirrors[i].rdev->in_sync && mddev->in_sync)
                         /*
                          * don't need to write this we are just rebuilding
                          */
                         continue;
-               if (!conf->mirrors[i].rdev)
-                       continue;
                 atomic_inc(&conf->mirrors[i].rdev->nr_pending);
                 r1_bio->write_bios[i] = bio;
         }
@@ -1009,9 +996,8 @@ static int sync_request(mddev_t *mddev, sector_t sector_nr, int go_faster)
         disk = conf->last_used;
         /* make sure disk is operational */
         spin_lock_irq(&conf->device_lock);
-       while (!conf->mirrors[disk].operational ||
-              conf->mirrors[disk].write_only ||
-               !conf->mirrors[disk].rdev) {
+       while (conf->mirrors[disk].rdev == NULL ||
+              !conf->mirrors[disk].rdev->in_sync) {
                 if (disk <= 0)
                         disk = conf->raid_disks;
                 disk--;
@@ -1149,8 +1135,6 @@ static int run(mddev_t *mddev)
                 disk = conf->mirrors + disk_idx;
  
                 disk->rdev = rdev;
-               disk->operational = ! rdev->faulty;
-               disk->write_only = ! rdev->in_sync;
                 disk->head_position = 0;
                 if (!rdev->faulty && rdev->in_sync)
                         conf->working_disks++;
@@ -1174,8 +1158,6 @@ static int run(mddev_t *mddev)
                 disk = conf->mirrors + i;
  
                 if (!disk->rdev) {
-                       disk->operational = 0;
-                       disk->write_only = 0;
                         disk->head_position = 0;
                         mddev->degraded++;
                 }
@@ -1186,8 +1168,8 @@ static int run(mddev_t *mddev)
          * to read balancing.
          */
         for (j = 0; j < conf->raid_disks &&
-                    (!conf->mirrors[j].operational ||
-                     conf->mirrors[j].write_only) ; j++)
+                    (!conf->mirrors[j].rdev ||
+                     !conf->mirrors[j].rdev->in_sync) ; j++)
                 /* nothing */;
         conf->last_used = j;
  
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c

index bb30999ca356224ede273b5ed46016262002ed02..15b7a9c82192dba8faba88533a8671f2af8bba97 100644 (file)
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -440,33 +440,30 @@ static void raid5_build_block (struct stripe_head *sh, int i)
                 dev->sector = compute_blocknr(sh, i);
  }
  
-static int error(mddev_t *mddev, mdk_rdev_t *rdev)
+static void error(mddev_t *mddev, mdk_rdev_t *rdev)
  {
         raid5_conf_t *conf = (raid5_conf_t *) mddev->private;
-       struct disk_info *disk;
-       int i;
-
         PRINTK("raid5: error called\n");
  
-       for (i = 0, disk = conf->disks; i < conf->raid_disks; i++, disk++) {
-               if (disk->rdev != rdev)
-                       continue;
-               if (disk->operational) {
-                       disk->operational = 0;
-                       mddev->sb_dirty = 1;
-                       conf->working_disks--;
-                       if (!disk->write_only) {
-                               mddev->degraded++;
-                               conf->failed_disks++;
-                       }
-                       printk (KERN_ALERT
-                               "raid5: Disk failure on %s, disabling device."
-                               " Operation continuing on %d devices\n",
-                               bdev_partition_name(rdev->bdev), conf->working_disks);
+       if (!rdev->faulty) {
+               mddev->sb_dirty = 1;
+               conf->working_disks--;
+               if (rdev->in_sync) {
+                       mddev->degraded++;
+                       conf->failed_disks++;
+                       rdev->in_sync = 0;
+                       /*
+                        * if recovery was running, stop it now.
+                        */
+                       if (mddev->recovery_running) 
+                               mddev->recovery_running = -EIO;
                 }
-               return 0;
+               rdev->faulty = 1;
+               printk (KERN_ALERT
+                       "raid5: Disk failure on %s, disabling device."
+                       " Operation continuing on %d devices\n",
+                       bdev_partition_name(rdev->bdev), conf->working_disks);
         }
-       return -EIO;
  }      
  
  /*
@@ -820,7 +817,6 @@ static void handle_stripe(struct stripe_head *sh)
         int disks = conf->raid_disks;
         struct bio *return_bi= NULL;
         struct bio *bi;
-       int action[MD_SB_DISKS];
         int i;
         int syncing;
         int locked=0, uptodate=0, to_read=0, to_write=0, failed=0, written=0;
@@ -828,7 +824,6 @@ static void handle_stripe(struct stripe_head *sh)
         struct r5dev *dev;
  
         PRINTK("handling stripe %ld, cnt=%d, pd_idx=%d\n", sh->sector, atomic_read(&sh->count), sh->pd_idx);
-       memset(action, 0, sizeof(action));
  
         spin_lock(&sh->lock);
         clear_bit(STRIPE_HANDLE, &sh->state);
@@ -838,7 +833,13 @@ static void handle_stripe(struct stripe_head *sh)
         /* Now to look around and see what can be done */
  
         for (i=disks; i--; ) {
+               mdk_rdev_t *rdev;
                 dev = &sh->dev[i];
+               clear_bit(R5_Wantread, &dev->flags);
+               clear_bit(R5_Wantwrite, &dev->flags);
+               clear_bit(R5_Insync, &dev->flags);
+               clear_bit(R5_Syncio, &dev->flags);
+
                 PRINTK("check %d: state 0x%lx read %p write %p written %p\n", i, 
                        dev->flags, dev->toread, dev->towrite, dev->written);
                 /* maybe we can reply to a read */
@@ -870,10 +871,12 @@ static void handle_stripe(struct stripe_head *sh)
                 if (dev->toread) to_read++;
                 if (dev->towrite) to_write++;
                 if (dev->written) written++;
-               if (!conf->disks[i].operational || conf->disks[i].write_only) {
+               rdev = conf->disks[i].rdev; /* FIXME, should I be looking rdev */
+               if (!rdev || !rdev->in_sync) {
                         failed++;
                         failed_num = i;
-               }
+               } else
+                       set_bit(R5_Insync, &dev->flags);
         }
         PRINTK("locked=%d uptodate=%d to_read=%d to_write=%d failed=%d failed_num=%d\n",
                locked, uptodate, to_read, to_write, failed, failed_num);
@@ -898,7 +901,7 @@ static void handle_stripe(struct stripe_head *sh)
                                 bi = nextbi;
                         }
                         /* fail any reads if this device is non-operational */
-                       if (!conf->disks[i].operational || conf->disks[i].write_only) {
+                       if (!test_bit(R5_Insync, &sh->dev[i].flags)) {
                                 bi = sh->dev[i].toread;
                                 sh->dev[i].toread = NULL;
                                 if (bi) to_read--;
@@ -926,7 +929,7 @@ static void handle_stripe(struct stripe_head *sh)
          */
         dev = &sh->dev[sh->pd_idx];
         if ( written &&
-            ( (conf->disks[sh->pd_idx].operational && !conf->disks[sh->pd_idx].write_only && !test_bit(R5_LOCKED, &dev->flags) &&
+            ( (test_bit(R5_Insync, &dev->flags) && !test_bit(R5_LOCKED, &dev->flags) &&
                 test_bit(R5_UPTODATE, &dev->flags))
                || (failed == 1 && failed_num == sh->pd_idx))
             ) {
@@ -934,7 +937,7 @@ static void handle_stripe(struct stripe_head *sh)
             for (i=disks; i--; )
                 if (sh->dev[i].written) {
                     dev = &sh->dev[i];
-                   if (!conf->disks[sh->pd_idx].operational || conf->disks[sh->pd_idx].write_only ||
+                   if (!test_bit(R5_Insync, &dev->flags) &&
                         (!test_bit(R5_LOCKED, &dev->flags) && test_bit(R5_UPTODATE, &dev->flags)) ) {
                         /* maybe we can return some write requests */
                             struct bio *wbi, *wbi2;
@@ -968,9 +971,9 @@ static void handle_stripe(struct stripe_head *sh)
                                         PRINTK("Computing block %d\n", i);
                                         compute_block(sh, i);
                                         uptodate++;
-                               } else if (conf->disks[i].operational && !conf->disks[i].write_only) {
+                               } else if (test_bit(R5_Insync, &dev->flags)) {
                                         set_bit(R5_LOCKED, &dev->flags);
-                                       action[i] = READ+1;
+                                       set_bit(R5_Wantread, &dev->flags);
  #if 0
                                         /* if I am just reading this block and we don't have
                                            a failed drive, or any pending writes then sidestep the cache */
@@ -1003,7 +1006,7 @@ static void handle_stripe(struct stripe_head *sh)
  #endif
                                     ) &&
                             !test_bit(R5_UPTODATE, &dev->flags)) {
-                               if (conf->disks[i].operational  && !conf->disks[i].write_only
+                               if (test_bit(R5_Insync, &dev->flags)
  /*                                 && !(!mddev->insync && i == sh->pd_idx) */
                                         )
                                         rmw++;
@@ -1017,7 +1020,7 @@ static void handle_stripe(struct stripe_head *sh)
  #endif
                                     ) &&
                             !test_bit(R5_UPTODATE, &dev->flags)) {
-                               if (conf->disks[i].operational && !conf->disks[i].write_only) rcw++;
+                               if (test_bit(R5_Insync, &dev->flags)) rcw++;
                                 else rcw += 2*disks;
                         }
                 }
@@ -1029,12 +1032,12 @@ static void handle_stripe(struct stripe_head *sh)
                                 dev = &sh->dev[i];
                                 if ((dev->towrite || i == sh->pd_idx) &&
                                     !test_bit(R5_LOCKED, &dev->flags) && !test_bit(R5_UPTODATE, &dev->flags) &&
-                                   conf->disks[i].operational && !conf->disks[i].write_only) {
+                                   test_bit(R5_Insync, &dev->flags)) {
                                         if (test_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
                                         {
                                                 PRINTK("Read_old block %d for r-m-w\n", i);
                                                 set_bit(R5_LOCKED, &dev->flags);
-                                               action[i] = READ+1;
+                                               set_bit(R5_Wantread, &dev->flags);
                                                 locked++;
                                         } else {
                                                 set_bit(STRIPE_DELAYED, &sh->state);
@@ -1048,12 +1051,12 @@ static void handle_stripe(struct stripe_head *sh)
                                 dev = &sh->dev[i];
                                 if (!test_bit(R5_OVERWRITE, &dev->flags) && i != sh->pd_idx &&
                                     !test_bit(R5_LOCKED, &dev->flags) && !test_bit(R5_UPTODATE, &dev->flags) &&
-                                   conf->disks[i].operational && !conf->disks[i].write_only) {
+                                   test_bit(R5_Insync, &dev->flags)) {
                                         if (test_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
                                         {
                                                 PRINTK("Read_old block %d for Reconstruct\n", i);
                                                 set_bit(R5_LOCKED, &dev->flags);
-                                               action[i] = READ+1;
+                                               set_bit(R5_Wantread, &dev->flags);
                                                 locked++;
                                         } else {
                                                 set_bit(STRIPE_DELAYED, &sh->state);
@@ -1070,8 +1073,8 @@ static void handle_stripe(struct stripe_head *sh)
                                 if (test_bit(R5_LOCKED, &sh->dev[i].flags)) {
                                         PRINTK("Writing block %d\n", i);
                                         locked++;
-                                       action[i] = WRITE+1;
-                                       if (!conf->disks[i].operational || conf->disks[i].write_only
+                                       set_bit(R5_Wantwrite, &sh->dev[i].flags);
+                                       if (!test_bit(R5_Insync, &sh->dev[i].flags)
                                             || (i==sh->pd_idx && failed == 0))
                                                 set_bit(STRIPE_INSYNC, &sh->state);
                                 }
@@ -1117,11 +1120,10 @@ static void handle_stripe(struct stripe_head *sh)
                                 BUG();
                         dev = &sh->dev[failed_num];
                         set_bit(R5_LOCKED, &dev->flags);
-                       action[failed_num] = WRITE+1;
+                       set_bit(R5_Wantwrite, &dev->flags);
                         locked++;
                         set_bit(STRIPE_INSYNC, &sh->state);
-                       if (conf->disks[failed_num].operational)
-                               md_sync_acct(conf->disks[failed_num].rdev, STRIPE_SECTORS);
+                       set_bit(R5_Syncio, &dev->flags);
                 }
         }
         if (syncing && locked == 0 && test_bit(STRIPE_INSYNC, &sh->state)) {
@@ -1137,32 +1139,34 @@ static void handle_stripe(struct stripe_head *sh)
                 bi->bi_end_io(bi);
         }
         for (i=disks; i-- ;) 
-               if (action[i]) {
+               if (sh->dev[i].flags & ((1<<R5_Wantwrite)|(1<<R5_Wantread))) {
                         struct bio *bi = &sh->dev[i].req;
                         mdk_rdev_t *rdev ;
  
-                       if (action[i] == READ+1)
+                       bi->bi_rw = 0;
+                       if (test_bit(R5_Wantread, &sh->dev[i].flags))
                                 bi->bi_end_io = raid5_end_read_request;
-                       else
+                       else {
                                 bi->bi_end_io = raid5_end_write_request;
+                               bi->bi_rw = 1;
+                       }
  
                         spin_lock_irq(&conf->device_lock);
                         rdev = conf->disks[i].rdev;
-                       if (!conf->disks[i].operational)
+                       if (rdev && rdev->faulty)
                                 rdev = NULL;
                         if (rdev)
                                 atomic_inc(&rdev->nr_pending);
                         spin_unlock_irq(&conf->device_lock);
  
                         if (rdev) {
+                               if (test_bit(R5_Syncio, &sh->dev[i].flags))
+                                       md_sync_acct(rdev, STRIPE_SECTORS);
+
                                 bi->bi_bdev = rdev->bdev;
-                               PRINTK("for %ld schedule op %d on disc %d\n", sh->sector, action[i]-1, i);
+                               PRINTK("for %ld schedule op %d on disc %d\n", sh->sector, bi->bi_rw, i);
                                 atomic_inc(&sh->count);
                                 bi->bi_sector = sh->sector;
-                               if (action[i] == READ+1) 
-                                       bi->bi_rw = 0;
-                               else
-                                       bi->bi_rw = 1;
                                 bi->bi_flags = 0;
                                 bi->bi_vcnt = 1;        
                                 bi->bi_idx = 0;
@@ -1171,7 +1175,7 @@ static void handle_stripe(struct stripe_head *sh)
                                 bi->bi_next = NULL;
                                 generic_make_request(bi);
                         } else {
-                               PRINTK("skip op %d on disc %d for sector %ld\n", action[i]-1, i, sh->sector);
+                               PRINTK("skip op %d on disc %d for sector %ld\n", bi->bi_rw, i, sh->sector);
                                 clear_bit(R5_LOCKED, &dev->flags);
                                 set_bit(STRIPE_HANDLE, &sh->state);
                         }
@@ -1388,17 +1392,9 @@ static int run (mddev_t *mddev)
  
                 disk->rdev = rdev;
  
-               if (rdev->faulty)
-                       disk->operational = 0;
-               else if (rdev->in_sync) {
+               if (rdev->in_sync) {
                         printk(KERN_INFO "raid5: device %s operational as raid disk %d\n", bdev_partition_name(rdev->bdev), raid_disk);
-       
-                       disk->operational = 1;
-                       disk->write_only = 0;
                         conf->working_disks++;
-               } else {
-                       disk->operational = 1;
-                       disk->write_only = 1;
                 }
         }
  
@@ -1534,7 +1530,9 @@ static int status (char *page, mddev_t *mddev)
         sz += sprintf (page+sz, " level %d, %dk chunk, algorithm %d", mddev->level, mddev->chunk_size >> 10, mddev->layout);
         sz += sprintf (page+sz, " [%d/%d] [", conf->raid_disks, conf->working_disks);
         for (i = 0; i < conf->raid_disks; i++)
-               sz += sprintf (page+sz, "%s", conf->disks[i].operational ? "U" : "_");
+               sz += sprintf (page+sz, "%s",
+                              conf->disks[i].rdev &&
+                              conf->disks[i].rdev->in_sync ? "U" : "_");
         sz += sprintf (page+sz, "]");
  #if RAID5_DEBUG
  #define D(x) \
@@ -1561,7 +1559,7 @@ static void print_raid5_conf (raid5_conf_t *conf)
                 tmp = conf->disks + i;
                 if (tmp->rdev)
                 printk(" disk %d, o:%d, dev:%s\n",
-                       i, tmp->operational,
+                       i, !tmp->rdev->faulty,
                         bdev_partition_name(tmp->rdev->bdev));
         }
  }
@@ -1575,10 +1573,9 @@ static int raid5_spare_active(mddev_t *mddev)
         spin_lock_irq(&conf->device_lock);
         for (i = 0; i < conf->raid_disks; i++) {
                 tmp = conf->disks + i;
-               if (tmp->operational && tmp->rdev
+               if (tmp->rdev
                     && !tmp->rdev->faulty
-                   && tmp->write_only) {
-                       tmp->write_only = 0;
+                   && !tmp->rdev->in_sync) {
                         mddev->degraded--;
                         conf->failed_disks--;
                         conf->working_disks++;
@@ -1600,7 +1597,7 @@ static int raid5_remove_disk(mddev_t *mddev, int number)
         spin_lock_irq(&conf->device_lock);
  
         if (p->rdev) {
-               if (p->operational || 
+               if (p->rdev->in_sync || 
                     atomic_read(&p->rdev->nr_pending)) {
                         err = -EBUSY;
                         goto abort;
@@ -1630,8 +1627,7 @@ static int raid5_add_disk(mddev_t *mddev, mdk_rdev_t *rdev)
         for (disk=0; disk < mddev->raid_disks; disk++)
                 if ((p=conf->disks + disk)->rdev == NULL) {
                         p->rdev = rdev;
-                       p->operational = 1;
-                       p->write_only = 1;
+                       rdev->in_sync = 0;
                         rdev->raid_disk = disk;
                         found = 1;
                         break;
diff --git a/include/linux/raid/md.h b/include/linux/raid/md.h

index f2d44e5fcd0bfd368f2a4e6a74c124942a90613c..a9cca6e4da8faa7ab38e876bd50bd622c856b8a4 100644 (file)
--- a/include/linux/raid/md.h
+++ b/include/linux/raid/md.h
@@ -77,8 +77,7 @@ extern void md_wakeup_thread(mdk_thread_t *thread);
  extern void md_interrupt_thread (mdk_thread_t *thread);
  extern void md_done_sync(mddev_t *mddev, int blocks, int ok);
  extern void md_sync_acct(mdk_rdev_t *rdev, unsigned long nr_sectors);
-extern int md_error (mddev_t *mddev, mdk_rdev_t *rdev);
-extern int md_run_setup(void);
+extern void md_error (mddev_t *mddev, mdk_rdev_t *rdev);
  
  extern void md_print_devices (void);
  
diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h

index 721aa5d478d0d0563c1e7f126ba560c23c539a0b..3c88b7882227d3464a22a13729f78bf62eb08776 100644 (file)
--- a/include/linux/raid/md_k.h
+++ b/include/linux/raid/md_k.h
@@ -154,6 +154,16 @@ struct mdk_rdev_s
         mdp_super_t     *sb;
         unsigned long   sb_offset;
  
+       /* A device can be in one of three states based on two flags:
+        * Not working:   faulty==1 in_sync==0
+        * Fully working: faulty==0 in_sync==1
+        * Working, but not
+        * in sync with array
+        *                faulty==0 in_sync==0
+        *
+        * It can never have faulty==1, in_sync==1
+        * This reduces the burden of testing multiple flags in many cases
+        */
         int faulty;                     /* if faulty do not issue IO requests */
         int in_sync;                    /* device is a full member of the array */
  
@@ -227,7 +237,10 @@ struct mdk_personality_s
         int (*run)(mddev_t *mddev);
         int (*stop)(mddev_t *mddev);
         int (*status)(char *page, mddev_t *mddev);
-       int (*error_handler)(mddev_t *mddev, mdk_rdev_t *rdev);
+       /* error_handler must set ->faulty and clear ->in_sync
+        * if appropriate, and should abort recovery if needed 
+        */
+       void (*error_handler)(mddev_t *mddev, mdk_rdev_t *rdev);
         int (*hot_add_disk) (mddev_t *mddev, mdk_rdev_t *rdev);
         int (*hot_remove_disk) (mddev_t *mddev, int number);
         int (*spare_active) (mddev_t *mddev);
diff --git a/include/linux/raid/multipath.h b/include/linux/raid/multipath.h

index f95a77eb298255034dd321b767745ae0ffdcebee..50db7f3c8c579b8bc5d4c12a8833e42887fc2638 100644 (file)
--- a/include/linux/raid/multipath.h
+++ b/include/linux/raid/multipath.h
@@ -6,11 +6,6 @@
  
  struct multipath_info {
         mdk_rdev_t      *rdev;
-
-       /*
-        * State bits:
-        */
-       int             operational;
  };
  
  struct multipath_private_data {
diff --git a/include/linux/raid/raid1.h b/include/linux/raid/raid1.h

index 7e7cf996d865ccc83231b12d1837b19933f1df7d..7095e77cb63eb2293544210396b6340c4a91edd1 100644 (file)
--- a/include/linux/raid/raid1.h
+++ b/include/linux/raid/raid1.h
@@ -8,12 +8,6 @@ typedef struct mirror_info mirror_info_t;
  struct mirror_info {
         mdk_rdev_t      *rdev;
         sector_t        head_position;
-
-       /*
-        * State bits:
-        */
-       int             operational;
-       int             write_only;
  };
  
  typedef struct r1bio_s r1bio_t;
diff --git a/include/linux/raid/raid5.h b/include/linux/raid/raid5.h

index 5e63e608b5eb6160d0ae30791bbb06ceeee0da95..9d08de50d13a50baa1260d78a97891530dbf7a46 100644 (file)
--- a/include/linux/raid/raid5.h
+++ b/include/linux/raid/raid5.h
@@ -148,6 +148,11 @@ struct stripe_head {
  #define        R5_UPTODATE     0       /* page contains current data */
  #define        R5_LOCKED       1       /* IO has been submitted on "req" */
  #define        R5_OVERWRITE    2       /* towrite covers whole page */
+/* and some that are internal to handle_stripe */
+#define        R5_Insync       3       /* rdev && rdev->in_sync at start */
+#define        R5_Wantread     4       /* want to schedule a read */
+#define        R5_Wantwrite    5
+#define        R5_Syncio       6       /* this io need to be accounted as resync io */
  
  /*
   * Write method
@@ -193,8 +198,6 @@ struct stripe_head {
  
  struct disk_info {
         mdk_rdev_t      *rdev;
-       int             operational;
-       int             write_only;
  };
  
  struct raid5_private_data {
author	Neil Brown <neilb@cse.unsw.edu.au>
	Fri, 23 Aug 2002 04:27:25 +0000 (21:27 -0700)
committer	Linus Torvalds <torvalds@home.transmeta.com>
	Fri, 23 Aug 2002 04:27:25 +0000 (21:27 -0700)
drivers/md/md.c		patch \| blob \| history
drivers/md/multipath.c		patch \| blob \| history
drivers/md/raid1.c		patch \| blob \| history
drivers/md/raid5.c		patch \| blob \| history
include/linux/raid/md.h		patch \| blob \| history
include/linux/raid/md_k.h		patch \| blob \| history
include/linux/raid/multipath.h		patch \| blob \| history
include/linux/raid/raid1.h		patch \| blob \| history
include/linux/raid/raid5.h		patch \| blob \| history