[PATCH] md: raid10 read-error handling - resync and read-only

author NeilBrown <neilb@suse.de>

Fri, 6 Jan 2006 08:20:29 +0000 (00:20 -0800)

committer Linus Torvalds <torvalds@g5.osdl.org>

Fri, 6 Jan 2006 16:34:05 +0000 (08:34 -0800)
author NeilBrown <neilb@suse.de>
Fri, 6 Jan 2006 08:20:29 +0000 (00:20 -0800)
committer Linus Torvalds <torvalds@g5.osdl.org>
Fri, 6 Jan 2006 16:34:05 +0000 (08:34 -0800)
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c

index 64bb4ddc6798232f1cda5cfbf87fa6e7c6d3792f..3f8df2ecbae31cdc335c525f8099de4e2bb2e158 100644 (file)
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -172,7 +172,7 @@ static void put_all_bios(conf_t *conf, r10bio_t *r10_bio)
  
         for (i = 0; i < conf->copies; i++) {
                 struct bio **bio = & r10_bio->devs[i].bio;
-               if (*bio)
+               if (*bio && *bio != IO_BLOCKED)
                         bio_put(*bio);
                 *bio = NULL;
         }
@@ -500,6 +500,7 @@ static int read_balance(conf_t *conf, r10bio_t *r10_bio)
                 disk = r10_bio->devs[slot].devnum;
  
                 while ((rdev = rcu_dereference(conf->mirrors[disk].rdev)) == NULL ||
+                      r10_bio->devs[slot].bio == IO_BLOCKED ||
                        !test_bit(In_sync, &rdev->flags)) {
                         slot++;
                         if (slot == conf->copies) {
@@ -517,6 +518,7 @@ static int read_balance(conf_t *conf, r10bio_t *r10_bio)
         slot = 0;
         disk = r10_bio->devs[slot].devnum;
         while ((rdev=rcu_dereference(conf->mirrors[disk].rdev)) == NULL ||
+              r10_bio->devs[slot].bio == IO_BLOCKED ||
                !test_bit(In_sync, &rdev->flags)) {
                 slot ++;
                 if (slot == conf->copies) {
@@ -537,6 +539,7 @@ static int read_balance(conf_t *conf, r10bio_t *r10_bio)
  
  
                 if ((rdev=rcu_dereference(conf->mirrors[ndisk].rdev)) == NULL ||
+                   r10_bio->devs[nslot].bio == IO_BLOCKED ||
                     !test_bit(In_sync, &rdev->flags))
                         continue;
  
@@ -1104,7 +1107,6 @@ abort:
  
  static int end_sync_read(struct bio *bio, unsigned int bytes_done, int error)
  {
-       int uptodate = test_bit(BIO_UPTODATE, &bio->bi_flags);
         r10bio_t * r10_bio = (r10bio_t *)(bio->bi_private);
         conf_t *conf = mddev_to_conf(r10_bio->mddev);
         int i,d;
@@ -1119,7 +1121,10 @@ static int end_sync_read(struct bio *bio, unsigned int bytes_done, int error)
                 BUG();
         update_head_pos(i, r10_bio);
         d = r10_bio->devs[i].devnum;
-       if (!uptodate)
+
+       if (test_bit(BIO_UPTODATE, &bio->bi_flags))
+               set_bit(R10BIO_Uptodate, &r10_bio->state);
+       else if (!test_bit(MD_RECOVERY_SYNC, &conf->mddev->recovery))
                 md_error(r10_bio->mddev,
                          conf->mirrors[d].rdev);
  
@@ -1209,25 +1214,30 @@ static void sync_request_write(mddev_t *mddev, r10bio_t *r10_bio)
         fbio = r10_bio->devs[i].bio;
  
         /* now find blocks with errors */
-       for (i=first+1 ; i < conf->copies ; i++) {
-               int vcnt, j, d;
+       for (i=0 ; i < conf->copies ; i++) {
+               int  j, d;
+               int vcnt = r10_bio->sectors >> (PAGE_SHIFT-9);
  
-               if (!test_bit(BIO_UPTODATE, &r10_bio->devs[i].bio->bi_flags))
-                       continue;
-               /* We know that the bi_io_vec layout is the same for
-                * both 'first' and 'i', so we just compare them.
-                * All vec entries are PAGE_SIZE;
-                */
                 tbio = r10_bio->devs[i].bio;
-               vcnt = r10_bio->sectors >> (PAGE_SHIFT-9);
-               for (j = 0; j < vcnt; j++)
-                       if (memcmp(page_address(fbio->bi_io_vec[j].bv_page),
-                                  page_address(tbio->bi_io_vec[j].bv_page),
-                                  PAGE_SIZE))
-                               break;
-               if (j == vcnt)
+
+               if (tbio->bi_end_io != end_sync_read)
+                       continue;
+               if (i == first)
                         continue;
-               mddev->resync_mismatches += r10_bio->sectors;
+               if (test_bit(BIO_UPTODATE, &r10_bio->devs[i].bio->bi_flags)) {
+                       /* We know that the bi_io_vec layout is the same for
+                        * both 'first' and 'i', so we just compare them.
+                        * All vec entries are PAGE_SIZE;
+                        */
+                       for (j = 0; j < vcnt; j++)
+                               if (memcmp(page_address(fbio->bi_io_vec[j].bv_page),
+                                          page_address(tbio->bi_io_vec[j].bv_page),
+                                          PAGE_SIZE))
+                                       break;
+                       if (j == vcnt)
+                               continue;
+                       mddev->resync_mismatches += r10_bio->sectors;
+               }
                 if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery))
                         /* Don't fix anything. */
                         continue;
@@ -1308,7 +1318,10 @@ static void recovery_request_write(mddev_t *mddev, r10bio_t *r10_bio)
  
         atomic_inc(&conf->mirrors[d].rdev->nr_pending);
         md_sync_acct(conf->mirrors[d].rdev->bdev, wbio->bi_size >> 9);
-       generic_make_request(wbio);
+       if (test_bit(R10BIO_Uptodate, &r10_bio->state))
+               generic_make_request(wbio);
+       else
+               bio_endio(wbio, wbio->bi_size, -EIO);
  }
  
  
@@ -1445,7 +1458,8 @@ static void raid10d(mddev_t *mddev)
                         unfreeze_array(conf);
  
                         bio = r10_bio->devs[r10_bio->read_slot].bio;
-                       r10_bio->devs[r10_bio->read_slot].bio = NULL;
+                       r10_bio->devs[r10_bio->read_slot].bio =
+                               mddev->ro ? IO_BLOCKED : NULL;
                         bio_put(bio);
                         mirror = read_balance(conf, r10_bio);
                         if (mirror == -1) {
diff --git a/include/linux/raid/raid10.h b/include/linux/raid/raid10.h

index dfa528385e3fe9654a47207bf9f661528c9e0ac6..b1103298a8c2cb043cd7c56d4a32b694463379e7 100644 (file)
--- a/include/linux/raid/raid10.h
+++ b/include/linux/raid/raid10.h
@@ -104,6 +104,13 @@ struct r10bio_s {
         } devs[0];
  };
  
+/* when we get a read error on a read-only array, we redirect to another
+ * device without failing the first device, or trying to over-write to
+ * correct the read error.  To keep track of bad blocks on a per-bio
+ * level, we store IO_BLOCKED in the appropriate 'bios' pointer
+ */
+#define IO_BLOCKED ((struct bio*)1)
+
  /* bits for r10bio.state */
  #define        R10BIO_Uptodate 0
  #define        R10BIO_IsSync   1
author	NeilBrown <neilb@suse.de>
	Fri, 6 Jan 2006 08:20:29 +0000 (00:20 -0800)
committer	Linus Torvalds <torvalds@g5.osdl.org>
	Fri, 6 Jan 2006 16:34:05 +0000 (08:34 -0800)
drivers/md/raid10.c		patch \| blob \| history
include/linux/raid/raid10.h		patch \| blob \| history