This source file includes following definitions.
- find_gendisk
- partition_name
- set_ra
- md_ioctl
- md_open
- md_release
- remap_request
- do_md_request
- make_md_request
- md_geninit
- get_md_status
- md_valid_device
- md_can_reemit
- register_md_personality
- unregister_md_personality
- md_init
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 #include <linux/config.h>
21 #include <linux/module.h>
22 #include <linux/version.h>
23 #include <linux/malloc.h>
24 #include <linux/mm.h>
25 #include <linux/md.h>
26 #include <linux/hdreg.h>
27 #include <linux/stat.h>
28 #include <linux/fs.h>
29 #include <linux/proc_fs.h>
30 #include <linux/blkdev.h>
31 #include <linux/genhd.h>
32 #include <errno.h>
33
34 #define MAJOR_NR MD_MAJOR
35 #define MD_DRIVER
36
37 #include <linux/blk.h>
38
39 #ifdef CONFIG_MD_SUPPORT_RAID5
40 int support_for_raid5;
41
42 #endif
43
44 #ifdef CONFIG_MD_SUPPORT_RAID1
45 int support_for_raid1;
46
47 #endif
48
49 static struct hd_struct md_hd_struct[MAX_MD_DEV];
50 static int md_blocksizes[MAX_MD_DEV];
51
52 int md_size[MAX_MD_DEV]={0, };
53
54 static void md_geninit (struct gendisk *);
55
56 static struct gendisk md_gendisk=
57 {
58 MD_MAJOR,
59 "md",
60 0,
61 1,
62 MAX_MD_DEV,
63 md_geninit,
64 md_hd_struct,
65 md_size,
66 MAX_MD_DEV,
67 NULL,
68 NULL
69 };
70
71 static struct md_personality *pers[MAX_PERSONALITY]={NULL, };
72
73 struct real_dev devices[MAX_MD_DEV][MAX_REAL];
74 struct md_dev md_dev[MAX_MD_DEV];
75
76 static struct gendisk *find_gendisk (kdev_t dev)
77 {
78 struct gendisk *tmp=gendisk_head;
79
80 while (tmp != NULL)
81 {
82 if (tmp->major==MAJOR(dev))
83 return (tmp);
84
85 tmp=tmp->next;
86 }
87
88 return (NULL);
89 }
90
91
92
93 char *partition_name (kdev_t dev)
94 {
95 static char name[10];
96
97 struct gendisk *hd=find_gendisk (dev);
98
99 if (!hd)
100 {
101 printk ("No gendisk entry for dev %04x\n", dev);
102 sprintf (name, "dev %04x", dev);
103 return (name);
104 }
105
106 return disk_name (hd, MINOR(dev), name);
107 }
108
109
110 static void set_ra (void)
111 {
112 int i, j, minra=INT_MAX;
113
114 for (i=0; i<MAX_MD_DEV; i++)
115 {
116 if (!md_dev[i].pers)
117 continue;
118
119 for (j=0; j<md_dev[i].nb_dev; j++)
120 if (read_ahead[MAJOR(devices[i][j].dev)]<minra)
121 minra=read_ahead[MAJOR(devices[i][j].dev)];
122 }
123
124 read_ahead[MD_MAJOR]=minra;
125 }
126
127
128 static int md_ioctl (struct inode *inode, struct file *file,
129 unsigned int cmd, unsigned long arg)
130 {
131 int minor, index, err, current_ra;
132 struct gendisk *gen_real;
133 struct hd_geometry *loc = (struct hd_geometry *) arg;
134 kdev_t dev;
135
136 if (!suser())
137 return -EACCES;
138
139 if (((minor=MINOR(inode->i_rdev)) & 0x80) &&
140 (minor & 0x7f) < MAX_PERSONALITY &&
141 pers[minor & 0x7f] &&
142 pers[minor & 0x7f]->ioctl)
143 return (pers[minor & 0x7f]->ioctl (inode, file, cmd, arg));
144
145 if (minor >= MAX_MD_DEV)
146 return -EINVAL;
147
148 switch (cmd)
149 {
150 case REGISTER_DEV:
151 dev=to_kdev_t ((dev_t) arg);
152 if (MAJOR(dev)==MD_MAJOR || md_dev[minor].nb_dev==MAX_REAL)
153 return -EINVAL;
154
155 if (!fs_may_mount (dev) || md_dev[minor].pers)
156 return -EBUSY;
157
158 if (!(gen_real=find_gendisk (dev)))
159 return -ENOENT;
160
161 index=md_dev[minor].nb_dev++;
162 devices[minor][index].dev=dev;
163
164
165
166
167
168
169 devices[minor][index].inode=get_empty_inode ();
170 devices[minor][index].inode->i_dev=dev;
171
172 insert_inode_hash (devices[minor][index].inode);
173
174
175
176
177
178 devices[minor][index].size=gen_real->sizes[MINOR(dev)] & ~((PAGE_SIZE >> 10)-1);
179 devices[minor][index].offset=index ?
180 (devices[minor][index-1].offset + devices[minor][index-1].size) : 0;
181
182 if (!index)
183 md_size[minor]=devices[minor][index].size;
184 else
185 md_size[minor]+=devices[minor][index].size;
186
187 printk("REGISTER_DEV %s to md%x done\n", partition_name(dev), minor);
188 break;
189
190 case START_MD:
191 if (!md_dev[minor].nb_dev)
192 return -EINVAL;
193
194 if (md_dev[minor].pers)
195 return -EBUSY;
196
197 md_dev[minor].repartition=(int) arg;
198
199 if ((index=PERSONALITY(md_dev+minor) >> (PERSONALITY_SHIFT))
200 >= MAX_PERSONALITY ||
201 !pers[index])
202 return -EINVAL;
203
204 md_dev[minor].pers=pers[index];
205
206 if ((err=md_dev[minor].pers->run (minor, md_dev+minor)))
207 {
208 md_dev[minor].pers=NULL;
209 return (err);
210 }
211
212
213
214
215 md_hd_struct[minor].start_sect=0;
216 md_hd_struct[minor].nr_sects=md_size[minor]<<1;
217
218
219
220
221 current_ra=read_ahead[MD_MAJOR];
222
223 for (index=0; index<md_dev[minor].nb_dev; index++)
224 {
225 if (current_ra>read_ahead[MAJOR(devices[minor][index].dev)])
226 current_ra=read_ahead[MAJOR(devices[minor][index].dev)];
227
228 devices[minor][index].fault_count=0;
229 devices[minor][index].invalid=VALID;
230 }
231
232 read_ahead[MD_MAJOR]=current_ra;
233
234 printk ("START_DEV md%x %s\n", minor, md_dev[minor].pers->name);
235 break;
236
237 case STOP_MD:
238 if (inode->i_count>1 || md_dev[minor].busy>1)
239 {
240 printk ("STOP_MD md%x failed : i_count=%d, busy=%d\n", minor, inode->i_count, md_dev[minor].busy);
241 return -EBUSY;
242 }
243
244 if (md_dev[minor].pers)
245 {
246
247 fsync_dev (inode->i_rdev);
248 invalidate_buffers (inode->i_rdev);
249 md_dev[minor].pers->stop (minor, md_dev+minor);
250 }
251
252
253 for (index=0; index<md_dev[minor].nb_dev; index++)
254 clear_inode (devices[minor][index].inode);
255
256 md_dev[minor].nb_dev=md_size[minor]=0;
257 md_dev[minor].pers=NULL;
258
259 set_ra ();
260
261 printk ("STOP_DEV md%x\n", minor);
262 break;
263
264 #if defined(CONFIG_MD_SUPPORT_RAID1) || defined(CONFIG_MD_SUPPORT_RAID5)
265 case MD_INVALID:
266 dev=to_kdev_t ((dev_t) arg);
267 if (!(err=md_valid_device (minor, dev, INVALID_ALWAYS)))
268 printk ("md%d : %s disabled\n", minor, partition_name (dev));
269
270 return (err);
271
272 case MD_VALID:
273 dev=to_kdev_t ((dev_t) arg);
274 if (!(err=md_valid_device (minor, dev, VALID)))
275 printk ("md%d : %s enabled\n", minor, partition_name (dev));
276
277 return (err);
278 #endif
279
280 case BLKGETSIZE:
281 if (!arg) return -EINVAL;
282 err=verify_area (VERIFY_WRITE, (long *) arg, sizeof(long));
283 if (err)
284 return err;
285 put_user (md_hd_struct[MINOR(inode->i_rdev)].nr_sects, (long *) arg);
286 break;
287
288 case BLKFLSBUF:
289 fsync_dev (inode->i_rdev);
290 invalidate_buffers (inode->i_rdev);
291 break;
292
293 case BLKRASET:
294 if (arg > 0xff)
295 return -EINVAL;
296 read_ahead[MAJOR(inode->i_rdev)] = arg;
297 return 0;
298
299 case BLKRAGET:
300 if (!arg) return -EINVAL;
301 err=verify_area (VERIFY_WRITE, (long *) arg, sizeof(long));
302 if (err)
303 return err;
304 put_user (read_ahead[MAJOR(inode->i_rdev)], (long *) arg);
305 break;
306
307 case HDIO_GETGEO:
308 if (!loc) return -EINVAL;
309 err = verify_area(VERIFY_WRITE, loc, sizeof(*loc));
310 if (err)
311 return err;
312 put_user (2, (char *) &loc->heads);
313 put_user (4, (char *) &loc->sectors);
314 put_user (md_hd_struct[minor].nr_sects/8, (short *) &loc->cylinders);
315 put_user (md_hd_struct[MINOR(inode->i_rdev)].start_sect,
316 (long *) &loc->start);
317 break;
318
319 RO_IOCTLS(inode->i_rdev,arg);
320
321 default:
322 printk ("Unknown md_ioctl %d\n", cmd);
323 return -EINVAL;
324 }
325
326 return (0);
327 }
328
329
330 static int md_open (struct inode *inode, struct file *file)
331 {
332 int minor=MINOR(inode->i_rdev);
333
334 md_dev[minor].busy++;
335 return (0);
336 }
337
338
339 static void md_release (struct inode *inode, struct file *file)
340 {
341 int minor=MINOR(inode->i_rdev);
342
343 sync_dev (inode->i_rdev);
344 md_dev[minor].busy--;
345 }
346
347
348 static struct file_operations md_fops=
349 {
350 NULL,
351 block_read,
352 block_write,
353 NULL,
354 NULL,
355 md_ioctl,
356 NULL,
357 md_open,
358 md_release,
359 block_fsync
360 };
361
362
363 static inline int remap_request (int minor, struct request *req)
364 {
365 if (!md_dev[minor].pers)
366 {
367 printk ("Oops ! md%d not running, giving up !\n", minor);
368 return -1;
369 }
370
371 return (md_dev[minor].pers->map(minor, md_dev+minor, req));
372 }
373
374 static void do_md_request (void)
375 {
376 int minor;
377 struct request *req;
378
379 while (1)
380 {
381 #ifdef MD_COUNT_SIZE
382 int reqsize, chunksize;
383 #endif
384
385 cli ();
386 req = blk_dev[MD_MAJOR].current_request;
387 if (!req || (req->rq_status == RQ_INACTIVE))
388 {
389 sti ();
390 return;
391 }
392
393 #ifdef MD_COUNT_SIZE
394 reqsize=req->nr_sectors>>1;
395 chunksize=1 << FACTOR_SHIFT(FACTOR(md_dev+MINOR(req->rq_dev)));
396 if (reqsize==chunksize) (md_dev+MINOR(req->rq_dev))->equal_count++;
397 if (reqsize<chunksize) (md_dev+MINOR(req->rq_dev))->smallest_count++;
398 if (reqsize>chunksize) (md_dev+MINOR(req->rq_dev))->biggest_count++;
399 #endif
400
401 blk_dev[MD_MAJOR].current_request = req->next;
402 sti ();
403
404 minor = MINOR(req->rq_dev);
405 if ((MAJOR(req->rq_dev) != MD_MAJOR) || (minor >= MAX_REAL))
406 {
407 printk("md: bad device number: 0x%04x\n", req->rq_dev);
408 end_request(0, req);
409 continue;
410 }
411
412 switch (remap_request (minor, req))
413 {
414 case REDIRECTED_BHREQ:
415 req->rq_status=RQ_INACTIVE;
416 wake_up (&wait_for_request);
417 break;
418
419 case REDIRECTED_REQ:
420 break;
421
422 case REDIRECT_FAILED:
423 end_request (0, req);
424 break;
425
426 default:
427 printk ("remap_request returned strange value !\n");
428 }
429 }
430 }
431
432 extern struct semaphore request_lock;
433
434 void make_md_request (struct request *pending, int n)
435 {
436 int i, j, max_req, major=0, rw, found;
437 kdev_t dev;
438 struct buffer_head *bh;
439 struct request *req;
440
441 down (&request_lock);
442
443 for (i=0; i<n; i++)
444 {
445 if (!pending[i].bh)
446 continue;
447
448 cli();
449
450 found=0;
451 rw=pending[i].cmd;
452 bh=pending[i].bh;
453 major=MAJOR(dev=pending[i].rq_dev);
454
455 max_req = (rw == READ) ? NR_REQUEST : ((NR_REQUEST*2)/3);
456
457 if (( major == IDE0_MAJOR
458 || major == IDE1_MAJOR
459 || major == SCSI_DISK_MAJOR
460 || major == IDE2_MAJOR
461 || major == IDE3_MAJOR)
462 && (req = blk_dev[major].current_request))
463 {
464
465
466
467
468 if (major != SCSI_DISK_MAJOR)
469 req = req->next;
470
471 while (req && !found)
472 {
473 if (req->rq_status!=RQ_INACTIVE && req->rq_status!=RQ_ACTIVE)
474 printk ("Saw bad status request !\n");
475
476 if (req->rq_dev == dev &&
477 !req->sem &&
478 req->cmd == rw &&
479 req->sector + req->nr_sectors == pending[i].sector &&
480 (req->nr_sectors + pending[i].nr_sectors) < 245)
481 {
482 req->bhtail->b_reqnext = bh;
483 req->bhtail = pending[i].bhtail;
484 req->nr_sectors += pending[i].nr_sectors;
485 found=1;
486 continue;
487 }
488
489 if (!found &&
490 req->rq_dev == dev &&
491 !req->sem &&
492 req->cmd == rw &&
493 req->sector - pending[i].nr_sectors == pending[i].sector &&
494 (req->nr_sectors + pending[i].nr_sectors) < 245)
495 {
496 req->nr_sectors += pending[i].nr_sectors;
497 bh->b_reqnext = req->bh;
498 req->buffer = bh->b_data;
499 req->current_nr_sectors = bh->b_size >> 9;
500 req->sector = pending[i].sector;
501 req->bh = bh;
502 found=1;
503 continue;
504 }
505
506 req = req->next;
507 }
508 }
509
510 if (found)
511 continue;
512
513 up (&request_lock);
514 req=get_md_request (max_req, dev);
515
516
517 req->cmd = rw;
518 req->errors = 0;
519 #if defined (CONFIG_MD_SUPPORT_RAID1)
520 req->shared_count = 0;
521 #endif
522 req->sector = pending[i].sector;
523 req->nr_sectors = pending[i].nr_sectors;
524 req->current_nr_sectors = bh->b_size >> 9;
525 req->buffer = bh->b_data;
526 req->sem = NULL;
527 req->bh = bh;
528 req->bhtail = pending[i].bhtail;
529 req->next = NULL;
530
531 add_request (blk_dev + MAJOR(dev), req);
532 down (&request_lock);
533 }
534
535 up (&request_lock);
536 for (j=0; j<n; j++)
537 {
538 if (!pending[j].bh)
539 continue;
540
541 pending[j].bh=NULL;
542 }
543
544 sti ();
545 }
546
547
548 static struct symbol_table md_symbol_table=
549 {
550 #include <linux/symtab_begin.h>
551
552 X(devices),
553 X(md_size),
554 X(add_request),
555 X(make_md_request),
556
557 #ifdef CONFIG_MD_SUPPORT_RAID1
558 X(support_for_raid1),
559 #endif
560
561 #ifdef CONFIG_MD_SUPPORT_RAID5
562 X(support_for_raid5),
563 #endif
564
565 X(register_md_personality),
566 X(unregister_md_personality),
567 X(partition_name),
568
569 #if defined(CONFIG_MD_SUPPORT_RAID1) || defined(CONFIG_MD_SUPPORT_RAID5)
570 X(md_valid_device),
571 X(md_can_reemit),
572 #endif
573
574 #include <linux/symtab_end.h>
575 };
576
577
578 static void md_geninit (struct gendisk *gdisk)
579 {
580 int i;
581
582 for(i=0;i<MAX_MD_DEV;i++)
583 {
584 md_blocksizes[i] = 1024;
585 md_gendisk.part[i].start_sect=-1;
586 md_dev[i].pers=NULL;
587 #ifdef MD_COUNT_SIZES
588 md_dev[i].smallest_count=md_dev[i].biggest_count=md_dev[i].equal_count=0;
589 #endif
590 }
591
592 blksize_size[MAJOR_NR] = md_blocksizes;
593 register_symtab (&md_symbol_table);
594
595 proc_register(&proc_root,
596 &(struct proc_dir_entry)
597 {
598 PROC_MD, 6, "mdstat",
599 S_IFREG | S_IRUGO, 1, 0, 0,
600 });
601 }
602
603
604 int get_md_status (char *page)
605 {
606 int sz=0, i, j;
607
608 sz+=sprintf( page+sz, "Personalities : ");
609 for (i=0; i<MAX_PERSONALITY; i++)
610 if (pers[i])
611 sz+=sprintf (page+sz, "[%d %s] ", i, pers[i]->name);
612
613 page[sz-1]='\n';
614
615 sz+=sprintf (page+sz, "read_ahead ");
616 if (read_ahead[MD_MAJOR]==INT_MAX)
617 sz+=sprintf (page+sz, "not set\n");
618 else
619 sz+=sprintf (page+sz, "%d sectors\n", read_ahead[MD_MAJOR]);
620
621 for (i=0; i<MAX_MD_DEV; i++)
622 {
623 sz+=sprintf (page+sz, "md%d : %sactive", i, md_dev[i].pers ? "" : "in");
624
625 if (md_dev[i].pers)
626 sz+=sprintf (page+sz, " %s", md_dev[i].pers->name);
627
628 for (j=0; j<md_dev[i].nb_dev; j++)
629 sz+=sprintf (page+sz, " %s%s%s",
630 (devices[i][j].invalid==VALID) ? "" : "(",
631 partition_name(devices[i][j].dev),
632 (devices[i][j].invalid==VALID) ? "" : ")");
633
634 if (md_dev[i].nb_dev)
635 sz+=sprintf (page+sz, " %d blocks", md_size[i]);
636
637 if (!md_dev[i].pers)
638 {
639 sz+=sprintf (page+sz, "\n");
640 continue;
641 }
642
643 if (md_dev[i].pers->max_invalid_dev)
644 sz+=sprintf (page+sz, " maxfault=%ld", MAX_FAULT(md_dev+i));
645
646 if (md_dev[i].pers != pers[(LINEAR>>PERSONALITY_SHIFT)])
647 {
648 sz+=sprintf (page+sz, " %dk chunks", 1<<FACTOR_SHIFT(FACTOR(md_dev+i)));
649 #ifdef MD_COUNT_SIZES
650 sz+=sprintf (page+sz, " (%d/%d/%d)",
651 md_dev[i].smallest_count,
652 md_dev[i].equal_count,
653 md_dev[i].biggest_count);
654 #endif
655 }
656 sz+=sprintf (page+sz, "\n");
657 sz+=md_dev[i].pers->status (page+sz, i, md_dev+i);
658 }
659
660 return (sz);
661 }
662
663 #if defined(CONFIG_MD_SUPPORT_RAID1) || defined(CONFIG_MD_SUPPORT_RAID5)
664
665 int md_valid_device (int minor, kdev_t dev, int mode)
666 {
667 int i;
668
669 for (i=0; i<md_dev[minor].nb_dev; i++)
670 if (devices[minor][i].dev==dev)
671 break;
672
673 if (i>md_dev[minor].nb_dev)
674 {
675 printk ("Oops, dev %04x not found in md_valid_device\n", dev);
676 return -EINVAL;
677 }
678
679 switch (mode)
680 {
681 case VALID:
682
683
684
685 if (devices[minor][i].invalid==INVALID_ALWAYS)
686 {
687 devices[minor][i].fault_count=0;
688 if (md_dev[minor].invalid_dev_count)
689 md_dev[minor].invalid_dev_count--;
690 }
691 break;
692
693 case INVALID:
694 if (devices[minor][i].invalid != VALID )
695 return 0;
696
697 if (++devices[minor][i].fault_count > MAX_FAULT(md_dev+minor) &&
698 MAX_FAULT(md_dev+minor)!=0xFF)
699 {
700
701
702
703 mode=INVALID_ALWAYS;
704 md_dev[minor].invalid_dev_count++;
705 }
706 else
707
708
709
710
711
712 if (md_dev[minor].invalid_dev_count<=md_dev[minor].pers->max_invalid_dev)
713 mode=INVALID_NEXT;
714 else
715 mode=VALID;
716 break;
717
718 case INVALID_ALWAYS:
719 md_dev[minor].invalid_dev_count++;
720 }
721
722 devices[minor][i].invalid=mode;
723 return 0;
724 }
725
726
727 int md_can_reemit (int minor)
728 {
729
730
731
732
733
734
735
736 if (!md_dev[minor].pers)
737 return (0);
738
739 return(md_dev[minor].pers->max_invalid_dev &&
740 ((md_dev[minor].pers->max_invalid_dev==-1) ?
741 1 :
742 md_dev[minor].invalid_dev_count<=md_dev[minor].pers->max_invalid_dev));
743 }
744
745 #endif
746
747 int register_md_personality (int p_num, struct md_personality *p)
748 {
749 int i=(p_num >> PERSONALITY_SHIFT);
750
751 if (i >= MAX_PERSONALITY)
752 return -EINVAL;
753
754 if (pers[i])
755 return -EBUSY;
756
757 pers[i]=p;
758 printk ("%s personality registered\n", p->name);
759 return 0;
760 }
761
762 int unregister_md_personality (int p_num)
763 {
764 int i=(p_num >> PERSONALITY_SHIFT);
765
766 if (i >= MAX_PERSONALITY)
767 return -EINVAL;
768
769 printk ("%s personality unregistered\n", pers[i]->name);
770 pers[i]=NULL;
771 return 0;
772 }
773
774 void linear_init (void);
775 void raid0_init (void);
776 void raid1_init (void);
777 void raid5_init (void);
778
779 int md_init (void)
780 {
781 printk ("md driver %s MAX_MD_DEV=%d, MAX_REAL=%d\n", MD_VERSION, MAX_MD_DEV, MAX_REAL);
782
783 if (register_blkdev (MD_MAJOR, "md", &md_fops))
784 {
785 printk ("Unable to get major %d for md\n", MD_MAJOR);
786 return (-1);
787 }
788
789 blk_dev[MD_MAJOR].request_fn=DEVICE_REQUEST;
790 blk_dev[MD_MAJOR].current_request=NULL;
791 read_ahead[MD_MAJOR]=INT_MAX;
792 md_gendisk.next=gendisk_head;
793
794 gendisk_head=&md_gendisk;
795
796 #ifdef CONFIG_MD_LINEAR
797 linear_init ();
798 #endif
799 #ifdef CONFIG_MD_STRIPED
800 raid0_init ();
801 #endif
802 #ifdef CONFIG_MD_RAID1
803 raid1_init ();
804 #endif
805 #ifdef CONFIG_MD_RAID5
806 raid5_init ();
807 #endif
808
809 return (0);
810 }