This source file includes following definitions.
- find_gendisk
- partition_name
- set_ra
- md_ioctl
- md_open
- md_release
- remap_request
- do_md_request
- make_md_request
- md_geninit
- get_md_status
- md_valid_device
- md_can_reemit
- register_md_personality
- unregister_md_personality
- md_init
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 #include <linux/config.h>
23 #include <linux/module.h>
24 #include <linux/version.h>
25 #include <linux/malloc.h>
26 #include <linux/mm.h>
27 #include <linux/md.h>
28 #include <linux/hdreg.h>
29 #include <linux/stat.h>
30 #include <linux/fs.h>
31 #include <linux/proc_fs.h>
32 #include <linux/blkdev.h>
33 #include <linux/genhd.h>
34 #ifdef CONFIG_KERNELD
35 #include <linux/kerneld.h>
36 #endif
37 #include <linux/errno.h>
38
39 #define MAJOR_NR MD_MAJOR
40 #define MD_DRIVER
41
42 #include <linux/blk.h>
43
44 #ifdef CONFIG_MD_SUPPORT_RAID5
45 int support_for_raid5;
46
47 #endif
48
49 #ifdef CONFIG_MD_SUPPORT_RAID1
50 int support_for_raid1;
51
52 #endif
53
54 static struct hd_struct md_hd_struct[MAX_MD_DEV];
55 static int md_blocksizes[MAX_MD_DEV];
56
57 int md_size[MAX_MD_DEV]={0, };
58
59 static void md_geninit (struct gendisk *);
60
61 static struct gendisk md_gendisk=
62 {
63 MD_MAJOR,
64 "md",
65 0,
66 1,
67 MAX_MD_DEV,
68 md_geninit,
69 md_hd_struct,
70 md_size,
71 MAX_MD_DEV,
72 NULL,
73 NULL
74 };
75
76 static struct md_personality *pers[MAX_PERSONALITY]={NULL, };
77
78 struct real_dev devices[MAX_MD_DEV][MAX_REAL];
79 struct md_dev md_dev[MAX_MD_DEV];
80
81 static struct gendisk *find_gendisk (kdev_t dev)
82 {
83 struct gendisk *tmp=gendisk_head;
84
85 while (tmp != NULL)
86 {
87 if (tmp->major==MAJOR(dev))
88 return (tmp);
89
90 tmp=tmp->next;
91 }
92
93 return (NULL);
94 }
95
96
97
98 char *partition_name (kdev_t dev)
99 {
100 static char name[40];
101
102 struct gendisk *hd = find_gendisk (dev);
103
104 if (!hd)
105 {
106 printk ("No gendisk entry for dev %s\n", kdevname(dev));
107 sprintf (name, "dev %s", kdevname(dev));
108 return (name);
109 }
110
111 return disk_name (hd, MINOR(dev), name);
112 }
113
114
115 static void set_ra (void)
116 {
117 int i, j, minra=INT_MAX;
118
119 for (i=0; i<MAX_MD_DEV; i++)
120 {
121 if (!md_dev[i].pers)
122 continue;
123
124 for (j=0; j<md_dev[i].nb_dev; j++)
125 if (read_ahead[MAJOR(devices[i][j].dev)]<minra)
126 minra=read_ahead[MAJOR(devices[i][j].dev)];
127 }
128
129 read_ahead[MD_MAJOR]=minra;
130 }
131
132
133 static int md_ioctl (struct inode *inode, struct file *file,
134 unsigned int cmd, unsigned long arg)
135 {
136 int minor, index, err, current_ra;
137 struct gendisk *gen_real;
138 struct hd_geometry *loc = (struct hd_geometry *) arg;
139 kdev_t dev;
140
141 if (!suser())
142 return -EACCES;
143
144 if (((minor=MINOR(inode->i_rdev)) & 0x80) &&
145 (minor & 0x7f) < MAX_PERSONALITY &&
146 pers[minor & 0x7f] &&
147 pers[minor & 0x7f]->ioctl)
148 return (pers[minor & 0x7f]->ioctl (inode, file, cmd, arg));
149
150 if (minor >= MAX_MD_DEV)
151 return -EINVAL;
152
153 switch (cmd)
154 {
155 case REGISTER_DEV:
156 dev=to_kdev_t ((dev_t) arg);
157 if (MAJOR(dev)==MD_MAJOR || md_dev[minor].nb_dev==MAX_REAL)
158 return -EINVAL;
159
160 if (!fs_may_mount (dev) || md_dev[minor].pers)
161 return -EBUSY;
162
163 if (!(gen_real=find_gendisk (dev)))
164 return -ENOENT;
165
166 index=md_dev[minor].nb_dev++;
167 devices[minor][index].dev=dev;
168
169
170
171
172
173
174 devices[minor][index].inode=get_empty_inode ();
175 devices[minor][index].inode->i_dev=dev;
176
177 insert_inode_hash (devices[minor][index].inode);
178
179
180
181
182
183 devices[minor][index].size=gen_real->sizes[MINOR(dev)] & ~((PAGE_SIZE >> 10)-1);
184 devices[minor][index].offset=index ?
185 (devices[minor][index-1].offset + devices[minor][index-1].size) : 0;
186
187 if (!index)
188 md_size[minor]=devices[minor][index].size;
189 else
190 md_size[minor]+=devices[minor][index].size;
191
192 printk("REGISTER_DEV %s to md%x done\n", partition_name(dev), minor);
193 break;
194
195 case START_MD:
196 if (!md_dev[minor].nb_dev)
197 return -EINVAL;
198
199 if (md_dev[minor].pers)
200 return -EBUSY;
201
202 md_dev[minor].repartition=(int) arg;
203
204 if ((index=PERSONALITY(md_dev+minor) >> (PERSONALITY_SHIFT))
205 >= MAX_PERSONALITY)
206 return -EINVAL;
207
208 if (!pers[index])
209 {
210 #ifdef CONFIG_KERNELD
211 char module_name[80];
212 sprintf (module_name, "md-personality-%d", index);
213 request_module (module_name);
214 if (!pers[index])
215 #endif
216 return -EINVAL;
217 }
218
219 md_dev[minor].pers=pers[index];
220
221 if ((err=md_dev[minor].pers->run (minor, md_dev+minor)))
222 {
223 md_dev[minor].pers=NULL;
224 return (err);
225 }
226
227
228
229
230 md_hd_struct[minor].start_sect=0;
231 md_hd_struct[minor].nr_sects=md_size[minor]<<1;
232
233
234
235
236 current_ra=read_ahead[MD_MAJOR];
237
238 for (index=0; index<md_dev[minor].nb_dev; index++)
239 {
240 if (current_ra>read_ahead[MAJOR(devices[minor][index].dev)])
241 current_ra=read_ahead[MAJOR(devices[minor][index].dev)];
242
243 devices[minor][index].fault_count=0;
244 devices[minor][index].invalid=VALID;
245 }
246
247 read_ahead[MD_MAJOR]=current_ra;
248
249 printk ("START_DEV md%x %s\n", minor, md_dev[minor].pers->name);
250 break;
251
252 case STOP_MD:
253 if (inode->i_count>1 || md_dev[minor].busy>1)
254 {
255 printk ("STOP_MD md%x failed : i_count=%d, busy=%d\n", minor, inode->i_count, md_dev[minor].busy);
256 return -EBUSY;
257 }
258
259 if (md_dev[minor].pers)
260 {
261
262 fsync_dev (inode->i_rdev);
263 invalidate_buffers (inode->i_rdev);
264 md_dev[minor].pers->stop (minor, md_dev+minor);
265 }
266
267
268 for (index=0; index<md_dev[minor].nb_dev; index++)
269 clear_inode (devices[minor][index].inode);
270
271 md_dev[minor].nb_dev=md_size[minor]=0;
272 md_dev[minor].pers=NULL;
273
274 set_ra ();
275
276 printk ("STOP_DEV md%x\n", minor);
277 break;
278
279 #if defined(CONFIG_MD_SUPPORT_RAID1) || defined(CONFIG_MD_SUPPORT_RAID5)
280 case MD_INVALID:
281 dev=to_kdev_t ((dev_t) arg);
282 if (!(err=md_valid_device (minor, dev, INVALID_ALWAYS)))
283 printk ("md%d : %s disabled\n", minor, partition_name (dev));
284
285 return (err);
286
287 case MD_VALID:
288 dev=to_kdev_t ((dev_t) arg);
289 if (!(err=md_valid_device (minor, dev, VALID)))
290 printk ("md%d : %s enabled\n", minor, partition_name (dev));
291
292 return (err);
293 #endif
294
295 case BLKGETSIZE:
296 if (!arg) return -EINVAL;
297 err=verify_area (VERIFY_WRITE, (long *) arg, sizeof(long));
298 if (err)
299 return err;
300 put_user (md_hd_struct[MINOR(inode->i_rdev)].nr_sects, (long *) arg);
301 break;
302
303 case BLKFLSBUF:
304 fsync_dev (inode->i_rdev);
305 invalidate_buffers (inode->i_rdev);
306 break;
307
308 case BLKRASET:
309 if (arg > 0xff)
310 return -EINVAL;
311 read_ahead[MAJOR(inode->i_rdev)] = arg;
312 return 0;
313
314 case BLKRAGET:
315 if (!arg) return -EINVAL;
316 err=verify_area (VERIFY_WRITE, (long *) arg, sizeof(long));
317 if (err)
318 return err;
319 put_user (read_ahead[MAJOR(inode->i_rdev)], (long *) arg);
320 break;
321
322 case HDIO_GETGEO:
323 if (!loc) return -EINVAL;
324 err = verify_area(VERIFY_WRITE, loc, sizeof(*loc));
325 if (err)
326 return err;
327 put_user (2, (char *) &loc->heads);
328 put_user (4, (char *) &loc->sectors);
329 put_user (md_hd_struct[minor].nr_sects/8, (short *) &loc->cylinders);
330 put_user (md_hd_struct[MINOR(inode->i_rdev)].start_sect,
331 (long *) &loc->start);
332 break;
333
334 RO_IOCTLS(inode->i_rdev,arg);
335
336 default:
337 printk ("Unknown md_ioctl %d\n", cmd);
338 return -EINVAL;
339 }
340
341 return (0);
342 }
343
344
345 static int md_open (struct inode *inode, struct file *file)
346 {
347 int minor=MINOR(inode->i_rdev);
348
349 md_dev[minor].busy++;
350 return (0);
351 }
352
353
354 static void md_release (struct inode *inode, struct file *file)
355 {
356 int minor=MINOR(inode->i_rdev);
357
358 sync_dev (inode->i_rdev);
359 md_dev[minor].busy--;
360 }
361
362
363 static struct file_operations md_fops=
364 {
365 NULL,
366 block_read,
367 block_write,
368 NULL,
369 NULL,
370 md_ioctl,
371 NULL,
372 md_open,
373 md_release,
374 block_fsync
375 };
376
377
378 static inline int remap_request (int minor, struct request *req)
379 {
380 if (!md_dev[minor].pers)
381 {
382 printk ("Oops ! md%d not running, giving up !\n", minor);
383 return -1;
384 }
385
386 return (md_dev[minor].pers->map(minor, md_dev+minor, req));
387 }
388
389 static void do_md_request (void)
390 {
391 int minor;
392 long flags;
393 struct request *req;
394
395 while (1)
396 {
397 #ifdef MD_COUNT_SIZE
398 int reqsize, chunksize;
399 #endif
400
401 save_flags (flags);
402 cli ();
403 req = blk_dev[MD_MAJOR].current_request;
404 if (!req || (req->rq_status == RQ_INACTIVE))
405 {
406 restore_flags (flags);
407 return;
408 }
409
410 #ifdef MD_COUNT_SIZE
411 reqsize=req->nr_sectors>>1;
412 chunksize=1 << FACTOR_SHIFT(FACTOR(md_dev+MINOR(req->rq_dev)));
413 if (reqsize==chunksize) (md_dev+MINOR(req->rq_dev))->equal_count++;
414 if (reqsize<chunksize) (md_dev+MINOR(req->rq_dev))->smallest_count++;
415 if (reqsize>chunksize) (md_dev+MINOR(req->rq_dev))->biggest_count++;
416 #endif
417
418 blk_dev[MD_MAJOR].current_request = req->next;
419 restore_flags (flags);
420
421 minor = MINOR(req->rq_dev);
422 if ((MAJOR(req->rq_dev) != MD_MAJOR) || (minor >= MAX_REAL))
423 {
424 printk("md: bad device: %s\n", kdevname(req->rq_dev));
425 end_request(0, req);
426 continue;
427 }
428
429 switch (remap_request (minor, req))
430 {
431 case REDIRECTED_BHREQ:
432 req->rq_status=RQ_INACTIVE;
433 wake_up (&wait_for_request);
434 break;
435
436 case REDIRECTED_REQ:
437 break;
438
439 case REDIRECT_FAILED:
440 end_request (0, req);
441 break;
442
443 default:
444 printk ("remap_request returned strange value !\n");
445 }
446 }
447 }
448
449 extern struct semaphore request_lock;
450
451 void make_md_request (struct request *pending, int n)
452 {
453 int i, j, max_req, major=0, rw, found;
454 kdev_t dev;
455 struct buffer_head *bh;
456 struct request *req;
457 long flags;
458
459 down (&request_lock);
460 save_flags (flags);
461 cli();
462
463 for (i=0; i<n; i++)
464 {
465 if (!pending[i].bh)
466 continue;
467
468 found=0;
469 rw=pending[i].cmd;
470 bh=pending[i].bh;
471 major=MAJOR(dev=pending[i].rq_dev);
472
473 max_req = (rw == READ) ? NR_REQUEST : ((NR_REQUEST*2)/3);
474
475 if (( major == IDE0_MAJOR
476 || major == IDE1_MAJOR
477 || major == SCSI_DISK_MAJOR
478 || major == IDE2_MAJOR
479 || major == IDE3_MAJOR)
480 && (req = blk_dev[major].current_request))
481 {
482
483
484
485
486 if (major != SCSI_DISK_MAJOR)
487 req = req->next;
488
489 while (req && !found)
490 {
491 if (req->rq_status!=RQ_ACTIVE && &blk_dev[major].plug!=req)
492 printk ("Saw bad status request !\n");
493
494 if (req->rq_dev == dev &&
495 !req->sem &&
496 req->cmd == rw &&
497 req->sector + req->nr_sectors == pending[i].sector &&
498 (req->nr_sectors + pending[i].nr_sectors) < 245)
499 {
500 req->bhtail->b_reqnext = bh;
501 req->bhtail = pending[i].bhtail;
502 req->nr_sectors += pending[i].nr_sectors;
503 found=1;
504 continue;
505 }
506
507 if (!found &&
508 req->rq_dev == dev &&
509 !req->sem &&
510 req->cmd == rw &&
511 req->sector - pending[i].nr_sectors == pending[i].sector &&
512 (req->nr_sectors + pending[i].nr_sectors) < 245)
513 {
514 req->nr_sectors += pending[i].nr_sectors;
515 pending[i].bhtail->b_reqnext = req->bh;
516 req->buffer = bh->b_data;
517 req->current_nr_sectors = bh->b_size >> 9;
518 req->sector = pending[i].sector;
519 req->bh = bh;
520 found=1;
521 continue;
522 }
523
524 req = req->next;
525 }
526 }
527
528 if (found)
529 continue;
530
531 up (&request_lock);
532 sti ();
533 req=get_md_request (max_req, dev);
534
535
536 req->cmd = rw;
537 req->errors = 0;
538 #if defined (CONFIG_MD_SUPPORT_RAID1)
539 req->shared_count = 0;
540 #endif
541 req->sector = pending[i].sector;
542 req->nr_sectors = pending[i].nr_sectors;
543 req->current_nr_sectors = bh->b_size >> 9;
544 req->buffer = bh->b_data;
545 req->sem = NULL;
546 req->bh = bh;
547 req->bhtail = pending[i].bhtail;
548 req->next = NULL;
549
550 add_request (blk_dev + MAJOR(dev), req);
551 down (&request_lock);
552 cli ();
553 }
554
555 up (&request_lock);
556 restore_flags (flags);
557 for (j=0; j<n; j++)
558 {
559 if (!pending[j].bh)
560 continue;
561
562 pending[j].bh=NULL;
563 }
564 }
565
566
567 static struct symbol_table md_symbol_table=
568 {
569 #include <linux/symtab_begin.h>
570
571 X(devices),
572 X(md_size),
573 X(add_request),
574 X(make_md_request),
575
576 #ifdef CONFIG_MD_SUPPORT_RAID1
577 X(support_for_raid1),
578 #endif
579
580 #ifdef CONFIG_MD_SUPPORT_RAID5
581 X(support_for_raid5),
582 #endif
583
584 X(register_md_personality),
585 X(unregister_md_personality),
586 X(partition_name),
587
588 #if defined(CONFIG_MD_SUPPORT_RAID1) || defined(CONFIG_MD_SUPPORT_RAID5)
589 X(md_valid_device),
590 X(md_can_reemit),
591 #endif
592
593 #include <linux/symtab_end.h>
594 };
595
596
597 static void md_geninit (struct gendisk *gdisk)
598 {
599 int i;
600
601 for(i=0;i<MAX_MD_DEV;i++)
602 {
603 md_blocksizes[i] = 1024;
604 md_gendisk.part[i].start_sect=-1;
605 md_dev[i].pers=NULL;
606 #ifdef MD_COUNT_SIZES
607 md_dev[i].smallest_count=md_dev[i].biggest_count=md_dev[i].equal_count=0;
608 #endif
609 }
610
611 blksize_size[MAJOR_NR] = md_blocksizes;
612 register_symtab (&md_symbol_table);
613
614 proc_register(&proc_root,
615 &(struct proc_dir_entry)
616 {
617 PROC_MD, 6, "mdstat",
618 S_IFREG | S_IRUGO, 1, 0, 0,
619 });
620 }
621
622
623 int get_md_status (char *page)
624 {
625 int sz=0, i, j;
626
627 sz+=sprintf( page+sz, "Personalities : ");
628 for (i=0; i<MAX_PERSONALITY; i++)
629 if (pers[i])
630 sz+=sprintf (page+sz, "[%d %s] ", i, pers[i]->name);
631
632 page[sz-1]='\n';
633
634 sz+=sprintf (page+sz, "read_ahead ");
635 if (read_ahead[MD_MAJOR]==INT_MAX)
636 sz+=sprintf (page+sz, "not set\n");
637 else
638 sz+=sprintf (page+sz, "%d sectors\n", read_ahead[MD_MAJOR]);
639
640 for (i=0; i<MAX_MD_DEV; i++)
641 {
642 sz+=sprintf (page+sz, "md%d : %sactive", i, md_dev[i].pers ? "" : "in");
643
644 if (md_dev[i].pers)
645 sz+=sprintf (page+sz, " %s", md_dev[i].pers->name);
646
647 for (j=0; j<md_dev[i].nb_dev; j++)
648 sz+=sprintf (page+sz, " %s%s%s",
649 (devices[i][j].invalid==VALID) ? "" : "(",
650 partition_name(devices[i][j].dev),
651 (devices[i][j].invalid==VALID) ? "" : ")");
652
653 if (md_dev[i].nb_dev)
654 sz+=sprintf (page+sz, " %d blocks", md_size[i]);
655
656 if (!md_dev[i].pers)
657 {
658 sz+=sprintf (page+sz, "\n");
659 continue;
660 }
661
662 if (md_dev[i].pers->max_invalid_dev)
663 sz+=sprintf (page+sz, " maxfault=%ld", MAX_FAULT(md_dev+i));
664
665 if (md_dev[i].pers != pers[(LINEAR>>PERSONALITY_SHIFT)])
666 {
667 sz+=sprintf (page+sz, " %dk chunks", 1<<FACTOR_SHIFT(FACTOR(md_dev+i)));
668 #ifdef MD_COUNT_SIZES
669 sz+=sprintf (page+sz, " (%d/%d/%d)",
670 md_dev[i].smallest_count,
671 md_dev[i].equal_count,
672 md_dev[i].biggest_count);
673 #endif
674 }
675 sz+=sprintf (page+sz, "\n");
676 sz+=md_dev[i].pers->status (page+sz, i, md_dev+i);
677 }
678
679 return (sz);
680 }
681
682 #if defined(CONFIG_MD_SUPPORT_RAID1) || defined(CONFIG_MD_SUPPORT_RAID5)
683
684 int md_valid_device (int minor, kdev_t dev, int mode)
685 {
686 int i;
687
688 for (i=0; i<md_dev[minor].nb_dev; i++)
689 if (devices[minor][i].dev==dev)
690 break;
691
692 if (i>md_dev[minor].nb_dev)
693 {
694 printk ("Oops, dev %04x not found in md_valid_device\n", dev);
695 return -EINVAL;
696 }
697
698 switch (mode)
699 {
700 case VALID:
701
702
703
704 if (devices[minor][i].invalid==INVALID_ALWAYS)
705 {
706 devices[minor][i].fault_count=0;
707 if (md_dev[minor].invalid_dev_count)
708 md_dev[minor].invalid_dev_count--;
709 }
710 break;
711
712 case INVALID:
713 if (devices[minor][i].invalid != VALID )
714 return 0;
715
716 if (++devices[minor][i].fault_count > MAX_FAULT(md_dev+minor) &&
717 MAX_FAULT(md_dev+minor)!=0xFF)
718 {
719
720
721
722 mode=INVALID_ALWAYS;
723 md_dev[minor].invalid_dev_count++;
724 }
725 else
726
727
728
729
730
731 if (md_dev[minor].invalid_dev_count<=md_dev[minor].pers->max_invalid_dev)
732 mode=INVALID_NEXT;
733 else
734 mode=VALID;
735 break;
736
737 case INVALID_ALWAYS:
738 md_dev[minor].invalid_dev_count++;
739 }
740
741 devices[minor][i].invalid=mode;
742 return 0;
743 }
744
745
746 int md_can_reemit (int minor)
747 {
748
749
750
751
752
753
754
755 if (!md_dev[minor].pers)
756 return (0);
757
758 return(md_dev[minor].pers->max_invalid_dev &&
759 ((md_dev[minor].pers->max_invalid_dev==-1) ?
760 1 :
761 md_dev[minor].invalid_dev_count<=md_dev[minor].pers->max_invalid_dev));
762 }
763
764 #endif
765
766 int register_md_personality (int p_num, struct md_personality *p)
767 {
768 int i=(p_num >> PERSONALITY_SHIFT);
769
770 if (i >= MAX_PERSONALITY)
771 return -EINVAL;
772
773 if (pers[i])
774 return -EBUSY;
775
776 pers[i]=p;
777 printk ("%s personality registered\n", p->name);
778 return 0;
779 }
780
781 int unregister_md_personality (int p_num)
782 {
783 int i=(p_num >> PERSONALITY_SHIFT);
784
785 if (i >= MAX_PERSONALITY)
786 return -EINVAL;
787
788 printk ("%s personality unregistered\n", pers[i]->name);
789 pers[i]=NULL;
790 return 0;
791 }
792
793 void linear_init (void);
794 void raid0_init (void);
795 void raid1_init (void);
796 void raid5_init (void);
797
798 int md_init (void)
799 {
800 printk ("md driver %s MAX_MD_DEV=%d, MAX_REAL=%d\n", MD_VERSION, MAX_MD_DEV, MAX_REAL);
801
802 if (register_blkdev (MD_MAJOR, "md", &md_fops))
803 {
804 printk ("Unable to get major %d for md\n", MD_MAJOR);
805 return (-1);
806 }
807
808 blk_dev[MD_MAJOR].request_fn=DEVICE_REQUEST;
809 blk_dev[MD_MAJOR].current_request=NULL;
810 read_ahead[MD_MAJOR]=INT_MAX;
811 md_gendisk.next=gendisk_head;
812
813 gendisk_head=&md_gendisk;
814
815 #ifdef CONFIG_MD_LINEAR
816 linear_init ();
817 #endif
818 #ifdef CONFIG_MD_STRIPED
819 raid0_init ();
820 #endif
821 #ifdef CONFIG_MD_RAID1
822 raid1_init ();
823 #endif
824 #ifdef CONFIG_MD_RAID5
825 raid5_init ();
826 #endif
827
828 return (0);
829 }