This source file includes following definitions.
- find_gendisk
- partition_name
- set_ra
- md_ioctl
- md_open
- md_release
- remap_request
- do_md_request
- make_md_request
- md_geninit
- get_md_status
- md_valid_device
- md_can_reemit
- register_md_personality
- unregister_md_personality
- md_init
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22 #include <linux/config.h>
23 #include <linux/module.h>
24 #include <linux/version.h>
25 #include <linux/malloc.h>
26 #include <linux/mm.h>
27 #include <linux/md.h>
28 #include <linux/hdreg.h>
29 #include <linux/stat.h>
30 #include <linux/fs.h>
31 #include <linux/proc_fs.h>
32 #include <linux/blkdev.h>
33 #include <linux/genhd.h>
34 #ifdef CONFIG_KERNELD
35 #include <linux/kerneld.h>
36 #endif
37 #include <errno.h>
38
39 #define MAJOR_NR MD_MAJOR
40 #define MD_DRIVER
41
42 #include <linux/blk.h>
43
44 #ifdef CONFIG_MD_SUPPORT_RAID5
45 int support_for_raid5;
46
47 #endif
48
49 #ifdef CONFIG_MD_SUPPORT_RAID1
50 int support_for_raid1;
51
52 #endif
53
54 static struct hd_struct md_hd_struct[MAX_MD_DEV];
55 static int md_blocksizes[MAX_MD_DEV];
56
57 int md_size[MAX_MD_DEV]={0, };
58
59 static void md_geninit (struct gendisk *);
60
61 static struct gendisk md_gendisk=
62 {
63 MD_MAJOR,
64 "md",
65 0,
66 1,
67 MAX_MD_DEV,
68 md_geninit,
69 md_hd_struct,
70 md_size,
71 MAX_MD_DEV,
72 NULL,
73 NULL
74 };
75
76 static struct md_personality *pers[MAX_PERSONALITY]={NULL, };
77
78 struct real_dev devices[MAX_MD_DEV][MAX_REAL];
79 struct md_dev md_dev[MAX_MD_DEV];
80
81 static struct gendisk *find_gendisk (kdev_t dev)
82 {
83 struct gendisk *tmp=gendisk_head;
84
85 while (tmp != NULL)
86 {
87 if (tmp->major==MAJOR(dev))
88 return (tmp);
89
90 tmp=tmp->next;
91 }
92
93 return (NULL);
94 }
95
96
97
98 char *partition_name (kdev_t dev)
99 {
100 static char name[40];
101
102 struct gendisk *hd = find_gendisk (dev);
103
104 if (!hd)
105 {
106 printk ("No gendisk entry for dev %s\n", kdevname(dev));
107 sprintf (name, "dev %s", kdevname(dev));
108 return (name);
109 }
110
111 return disk_name (hd, MINOR(dev), name);
112 }
113
114
115 static void set_ra (void)
116 {
117 int i, j, minra=INT_MAX;
118
119 for (i=0; i<MAX_MD_DEV; i++)
120 {
121 if (!md_dev[i].pers)
122 continue;
123
124 for (j=0; j<md_dev[i].nb_dev; j++)
125 if (read_ahead[MAJOR(devices[i][j].dev)]<minra)
126 minra=read_ahead[MAJOR(devices[i][j].dev)];
127 }
128
129 read_ahead[MD_MAJOR]=minra;
130 }
131
132
133 static int md_ioctl (struct inode *inode, struct file *file,
134 unsigned int cmd, unsigned long arg)
135 {
136 int minor, index, err, current_ra;
137 struct gendisk *gen_real;
138 struct hd_geometry *loc = (struct hd_geometry *) arg;
139 kdev_t dev;
140
141 if (!suser())
142 return -EACCES;
143
144 if (((minor=MINOR(inode->i_rdev)) & 0x80) &&
145 (minor & 0x7f) < MAX_PERSONALITY &&
146 pers[minor & 0x7f] &&
147 pers[minor & 0x7f]->ioctl)
148 return (pers[minor & 0x7f]->ioctl (inode, file, cmd, arg));
149
150 if (minor >= MAX_MD_DEV)
151 return -EINVAL;
152
153 switch (cmd)
154 {
155 case REGISTER_DEV:
156 dev=to_kdev_t ((dev_t) arg);
157 if (MAJOR(dev)==MD_MAJOR || md_dev[minor].nb_dev==MAX_REAL)
158 return -EINVAL;
159
160 if (!fs_may_mount (dev) || md_dev[minor].pers)
161 return -EBUSY;
162
163 if (!(gen_real=find_gendisk (dev)))
164 return -ENOENT;
165
166 index=md_dev[minor].nb_dev++;
167 devices[minor][index].dev=dev;
168
169
170
171
172
173
174 devices[minor][index].inode=get_empty_inode ();
175 devices[minor][index].inode->i_dev=dev;
176
177 insert_inode_hash (devices[minor][index].inode);
178
179
180
181
182
183 devices[minor][index].size=gen_real->sizes[MINOR(dev)] & ~((PAGE_SIZE >> 10)-1);
184 devices[minor][index].offset=index ?
185 (devices[minor][index-1].offset + devices[minor][index-1].size) : 0;
186
187 if (!index)
188 md_size[minor]=devices[minor][index].size;
189 else
190 md_size[minor]+=devices[minor][index].size;
191
192 printk("REGISTER_DEV %s to md%x done\n", partition_name(dev), minor);
193 break;
194
195 case START_MD:
196 if (!md_dev[minor].nb_dev)
197 return -EINVAL;
198
199 if (md_dev[minor].pers)
200 return -EBUSY;
201
202 md_dev[minor].repartition=(int) arg;
203
204 if ((index=PERSONALITY(md_dev+minor) >> (PERSONALITY_SHIFT))
205 >= MAX_PERSONALITY)
206 return -EINVAL;
207
208 if (!pers[index])
209 {
210 #ifdef CONFIG_KERNELD
211 char module_name[80];
212 sprintf (module_name, "md-personality-%d", index);
213 request_module (module_name);
214 if (!pers[index])
215 #endif
216 return -EINVAL;
217 }
218
219 md_dev[minor].pers=pers[index];
220
221 if ((err=md_dev[minor].pers->run (minor, md_dev+minor)))
222 {
223 md_dev[minor].pers=NULL;
224 return (err);
225 }
226
227
228
229
230 md_hd_struct[minor].start_sect=0;
231 md_hd_struct[minor].nr_sects=md_size[minor]<<1;
232
233
234
235
236 current_ra=read_ahead[MD_MAJOR];
237
238 for (index=0; index<md_dev[minor].nb_dev; index++)
239 {
240 if (current_ra>read_ahead[MAJOR(devices[minor][index].dev)])
241 current_ra=read_ahead[MAJOR(devices[minor][index].dev)];
242
243 devices[minor][index].fault_count=0;
244 devices[minor][index].invalid=VALID;
245 }
246
247 read_ahead[MD_MAJOR]=current_ra;
248
249 printk ("START_DEV md%x %s\n", minor, md_dev[minor].pers->name);
250 break;
251
252 case STOP_MD:
253 if (inode->i_count>1 || md_dev[minor].busy>1)
254 {
255 printk ("STOP_MD md%x failed : i_count=%d, busy=%d\n", minor, inode->i_count, md_dev[minor].busy);
256 return -EBUSY;
257 }
258
259 if (md_dev[minor].pers)
260 {
261
262 fsync_dev (inode->i_rdev);
263 invalidate_buffers (inode->i_rdev);
264 md_dev[minor].pers->stop (minor, md_dev+minor);
265 }
266
267
268 for (index=0; index<md_dev[minor].nb_dev; index++)
269 clear_inode (devices[minor][index].inode);
270
271 md_dev[minor].nb_dev=md_size[minor]=0;
272 md_dev[minor].pers=NULL;
273
274 set_ra ();
275
276 printk ("STOP_DEV md%x\n", minor);
277 break;
278
279 #if defined(CONFIG_MD_SUPPORT_RAID1) || defined(CONFIG_MD_SUPPORT_RAID5)
280 case MD_INVALID:
281 dev=to_kdev_t ((dev_t) arg);
282 if (!(err=md_valid_device (minor, dev, INVALID_ALWAYS)))
283 printk ("md%d : %s disabled\n", minor, partition_name (dev));
284
285 return (err);
286
287 case MD_VALID:
288 dev=to_kdev_t ((dev_t) arg);
289 if (!(err=md_valid_device (minor, dev, VALID)))
290 printk ("md%d : %s enabled\n", minor, partition_name (dev));
291
292 return (err);
293 #endif
294
295 case BLKGETSIZE:
296 if (!arg) return -EINVAL;
297 err=verify_area (VERIFY_WRITE, (long *) arg, sizeof(long));
298 if (err)
299 return err;
300 put_user (md_hd_struct[MINOR(inode->i_rdev)].nr_sects, (long *) arg);
301 break;
302
303 case BLKFLSBUF:
304 fsync_dev (inode->i_rdev);
305 invalidate_buffers (inode->i_rdev);
306 break;
307
308 case BLKRASET:
309 if (arg > 0xff)
310 return -EINVAL;
311 read_ahead[MAJOR(inode->i_rdev)] = arg;
312 return 0;
313
314 case BLKRAGET:
315 if (!arg) return -EINVAL;
316 err=verify_area (VERIFY_WRITE, (long *) arg, sizeof(long));
317 if (err)
318 return err;
319 put_user (read_ahead[MAJOR(inode->i_rdev)], (long *) arg);
320 break;
321
322 case HDIO_GETGEO:
323 if (!loc) return -EINVAL;
324 err = verify_area(VERIFY_WRITE, loc, sizeof(*loc));
325 if (err)
326 return err;
327 put_user (2, (char *) &loc->heads);
328 put_user (4, (char *) &loc->sectors);
329 put_user (md_hd_struct[minor].nr_sects/8, (short *) &loc->cylinders);
330 put_user (md_hd_struct[MINOR(inode->i_rdev)].start_sect,
331 (long *) &loc->start);
332 break;
333
334 RO_IOCTLS(inode->i_rdev,arg);
335
336 default:
337 printk ("Unknown md_ioctl %d\n", cmd);
338 return -EINVAL;
339 }
340
341 return (0);
342 }
343
344
345 static int md_open (struct inode *inode, struct file *file)
346 {
347 int minor=MINOR(inode->i_rdev);
348
349 md_dev[minor].busy++;
350 return (0);
351 }
352
353
354 static void md_release (struct inode *inode, struct file *file)
355 {
356 int minor=MINOR(inode->i_rdev);
357
358 sync_dev (inode->i_rdev);
359 md_dev[minor].busy--;
360 }
361
362
363 static struct file_operations md_fops=
364 {
365 NULL,
366 block_read,
367 block_write,
368 NULL,
369 NULL,
370 md_ioctl,
371 NULL,
372 md_open,
373 md_release,
374 block_fsync
375 };
376
377
378 static inline int remap_request (int minor, struct request *req)
379 {
380 if (!md_dev[minor].pers)
381 {
382 printk ("Oops ! md%d not running, giving up !\n", minor);
383 return -1;
384 }
385
386 return (md_dev[minor].pers->map(minor, md_dev+minor, req));
387 }
388
389 static void do_md_request (void)
390 {
391 int minor;
392 struct request *req;
393
394 while (1)
395 {
396 #ifdef MD_COUNT_SIZE
397 int reqsize, chunksize;
398 #endif
399
400 cli ();
401 req = blk_dev[MD_MAJOR].current_request;
402 if (!req || (req->rq_status == RQ_INACTIVE))
403 {
404 sti ();
405 return;
406 }
407
408 #ifdef MD_COUNT_SIZE
409 reqsize=req->nr_sectors>>1;
410 chunksize=1 << FACTOR_SHIFT(FACTOR(md_dev+MINOR(req->rq_dev)));
411 if (reqsize==chunksize) (md_dev+MINOR(req->rq_dev))->equal_count++;
412 if (reqsize<chunksize) (md_dev+MINOR(req->rq_dev))->smallest_count++;
413 if (reqsize>chunksize) (md_dev+MINOR(req->rq_dev))->biggest_count++;
414 #endif
415
416 blk_dev[MD_MAJOR].current_request = req->next;
417 sti ();
418
419 minor = MINOR(req->rq_dev);
420 if ((MAJOR(req->rq_dev) != MD_MAJOR) || (minor >= MAX_REAL))
421 {
422 printk("md: bad device: %s\n", kdevname(req->rq_dev));
423 end_request(0, req);
424 continue;
425 }
426
427 switch (remap_request (minor, req))
428 {
429 case REDIRECTED_BHREQ:
430 req->rq_status=RQ_INACTIVE;
431 wake_up (&wait_for_request);
432 break;
433
434 case REDIRECTED_REQ:
435 break;
436
437 case REDIRECT_FAILED:
438 end_request (0, req);
439 break;
440
441 default:
442 printk ("remap_request returned strange value !\n");
443 }
444 }
445 }
446
447 extern struct semaphore request_lock;
448
449 void make_md_request (struct request *pending, int n)
450 {
451 int i, j, max_req, major=0, rw, found;
452 kdev_t dev;
453 struct buffer_head *bh;
454 struct request *req;
455 long flags;
456
457 down (&request_lock);
458 save_flags (flags);
459 cli();
460
461 for (i=0; i<n; i++)
462 {
463 if (!pending[i].bh)
464 continue;
465
466 found=0;
467 rw=pending[i].cmd;
468 bh=pending[i].bh;
469 major=MAJOR(dev=pending[i].rq_dev);
470
471 max_req = (rw == READ) ? NR_REQUEST : ((NR_REQUEST*2)/3);
472
473 if (( major == IDE0_MAJOR
474 || major == IDE1_MAJOR
475 || major == SCSI_DISK_MAJOR
476 || major == IDE2_MAJOR
477 || major == IDE3_MAJOR)
478 && (req = blk_dev[major].current_request))
479 {
480
481
482
483
484 if (major != SCSI_DISK_MAJOR)
485 req = req->next;
486
487 while (req && !found)
488 {
489 if (req->rq_status!=RQ_ACTIVE)
490 printk ("Saw bad status request !\n");
491
492 if (req->rq_dev == dev &&
493 !req->sem &&
494 req->cmd == rw &&
495 req->sector + req->nr_sectors == pending[i].sector &&
496 (req->nr_sectors + pending[i].nr_sectors) < 245)
497 {
498 req->bhtail->b_reqnext = bh;
499 req->bhtail = pending[i].bhtail;
500 req->nr_sectors += pending[i].nr_sectors;
501 found=1;
502 continue;
503 }
504
505 if (!found &&
506 req->rq_dev == dev &&
507 !req->sem &&
508 req->cmd == rw &&
509 req->sector - pending[i].nr_sectors == pending[i].sector &&
510 (req->nr_sectors + pending[i].nr_sectors) < 245)
511 {
512 req->nr_sectors += pending[i].nr_sectors;
513 pending[i].bhtail->b_reqnext = req->bh;
514 req->buffer = bh->b_data;
515 req->current_nr_sectors = bh->b_size >> 9;
516 req->sector = pending[i].sector;
517 req->bh = bh;
518 found=1;
519 continue;
520 }
521
522 req = req->next;
523 }
524 }
525
526 if (found)
527 continue;
528
529 up (&request_lock);
530 sti ();
531 req=get_md_request (max_req, dev);
532
533
534 req->cmd = rw;
535 req->errors = 0;
536 #if defined (CONFIG_MD_SUPPORT_RAID1)
537 req->shared_count = 0;
538 #endif
539 req->sector = pending[i].sector;
540 req->nr_sectors = pending[i].nr_sectors;
541 req->current_nr_sectors = bh->b_size >> 9;
542 req->buffer = bh->b_data;
543 req->sem = NULL;
544 req->bh = bh;
545 req->bhtail = pending[i].bhtail;
546 req->next = NULL;
547
548 add_request (blk_dev + MAJOR(dev), req);
549 down (&request_lock);
550 cli ();
551 }
552
553 up (&request_lock);
554 restore_flags (flags);
555 for (j=0; j<n; j++)
556 {
557 if (!pending[j].bh)
558 continue;
559
560 pending[j].bh=NULL;
561 }
562 }
563
564
565 static struct symbol_table md_symbol_table=
566 {
567 #include <linux/symtab_begin.h>
568
569 X(devices),
570 X(md_size),
571 X(add_request),
572 X(make_md_request),
573
574 #ifdef CONFIG_MD_SUPPORT_RAID1
575 X(support_for_raid1),
576 #endif
577
578 #ifdef CONFIG_MD_SUPPORT_RAID5
579 X(support_for_raid5),
580 #endif
581
582 X(register_md_personality),
583 X(unregister_md_personality),
584 X(partition_name),
585
586 #if defined(CONFIG_MD_SUPPORT_RAID1) || defined(CONFIG_MD_SUPPORT_RAID5)
587 X(md_valid_device),
588 X(md_can_reemit),
589 #endif
590
591 #include <linux/symtab_end.h>
592 };
593
594
595 static void md_geninit (struct gendisk *gdisk)
596 {
597 int i;
598
599 for(i=0;i<MAX_MD_DEV;i++)
600 {
601 md_blocksizes[i] = 1024;
602 md_gendisk.part[i].start_sect=-1;
603 md_dev[i].pers=NULL;
604 #ifdef MD_COUNT_SIZES
605 md_dev[i].smallest_count=md_dev[i].biggest_count=md_dev[i].equal_count=0;
606 #endif
607 }
608
609 blksize_size[MAJOR_NR] = md_blocksizes;
610 register_symtab (&md_symbol_table);
611
612 proc_register(&proc_root,
613 &(struct proc_dir_entry)
614 {
615 PROC_MD, 6, "mdstat",
616 S_IFREG | S_IRUGO, 1, 0, 0,
617 });
618 }
619
620
621 int get_md_status (char *page)
622 {
623 int sz=0, i, j;
624
625 sz+=sprintf( page+sz, "Personalities : ");
626 for (i=0; i<MAX_PERSONALITY; i++)
627 if (pers[i])
628 sz+=sprintf (page+sz, "[%d %s] ", i, pers[i]->name);
629
630 page[sz-1]='\n';
631
632 sz+=sprintf (page+sz, "read_ahead ");
633 if (read_ahead[MD_MAJOR]==INT_MAX)
634 sz+=sprintf (page+sz, "not set\n");
635 else
636 sz+=sprintf (page+sz, "%d sectors\n", read_ahead[MD_MAJOR]);
637
638 for (i=0; i<MAX_MD_DEV; i++)
639 {
640 sz+=sprintf (page+sz, "md%d : %sactive", i, md_dev[i].pers ? "" : "in");
641
642 if (md_dev[i].pers)
643 sz+=sprintf (page+sz, " %s", md_dev[i].pers->name);
644
645 for (j=0; j<md_dev[i].nb_dev; j++)
646 sz+=sprintf (page+sz, " %s%s%s",
647 (devices[i][j].invalid==VALID) ? "" : "(",
648 partition_name(devices[i][j].dev),
649 (devices[i][j].invalid==VALID) ? "" : ")");
650
651 if (md_dev[i].nb_dev)
652 sz+=sprintf (page+sz, " %d blocks", md_size[i]);
653
654 if (!md_dev[i].pers)
655 {
656 sz+=sprintf (page+sz, "\n");
657 continue;
658 }
659
660 if (md_dev[i].pers->max_invalid_dev)
661 sz+=sprintf (page+sz, " maxfault=%ld", MAX_FAULT(md_dev+i));
662
663 if (md_dev[i].pers != pers[(LINEAR>>PERSONALITY_SHIFT)])
664 {
665 sz+=sprintf (page+sz, " %dk chunks", 1<<FACTOR_SHIFT(FACTOR(md_dev+i)));
666 #ifdef MD_COUNT_SIZES
667 sz+=sprintf (page+sz, " (%d/%d/%d)",
668 md_dev[i].smallest_count,
669 md_dev[i].equal_count,
670 md_dev[i].biggest_count);
671 #endif
672 }
673 sz+=sprintf (page+sz, "\n");
674 sz+=md_dev[i].pers->status (page+sz, i, md_dev+i);
675 }
676
677 return (sz);
678 }
679
680 #if defined(CONFIG_MD_SUPPORT_RAID1) || defined(CONFIG_MD_SUPPORT_RAID5)
681
682 int md_valid_device (int minor, kdev_t dev, int mode)
683 {
684 int i;
685
686 for (i=0; i<md_dev[minor].nb_dev; i++)
687 if (devices[minor][i].dev==dev)
688 break;
689
690 if (i>md_dev[minor].nb_dev)
691 {
692 printk ("Oops, dev %04x not found in md_valid_device\n", dev);
693 return -EINVAL;
694 }
695
696 switch (mode)
697 {
698 case VALID:
699
700
701
702 if (devices[minor][i].invalid==INVALID_ALWAYS)
703 {
704 devices[minor][i].fault_count=0;
705 if (md_dev[minor].invalid_dev_count)
706 md_dev[minor].invalid_dev_count--;
707 }
708 break;
709
710 case INVALID:
711 if (devices[minor][i].invalid != VALID )
712 return 0;
713
714 if (++devices[minor][i].fault_count > MAX_FAULT(md_dev+minor) &&
715 MAX_FAULT(md_dev+minor)!=0xFF)
716 {
717
718
719
720 mode=INVALID_ALWAYS;
721 md_dev[minor].invalid_dev_count++;
722 }
723 else
724
725
726
727
728
729 if (md_dev[minor].invalid_dev_count<=md_dev[minor].pers->max_invalid_dev)
730 mode=INVALID_NEXT;
731 else
732 mode=VALID;
733 break;
734
735 case INVALID_ALWAYS:
736 md_dev[minor].invalid_dev_count++;
737 }
738
739 devices[minor][i].invalid=mode;
740 return 0;
741 }
742
743
744 int md_can_reemit (int minor)
745 {
746
747
748
749
750
751
752
753 if (!md_dev[minor].pers)
754 return (0);
755
756 return(md_dev[minor].pers->max_invalid_dev &&
757 ((md_dev[minor].pers->max_invalid_dev==-1) ?
758 1 :
759 md_dev[minor].invalid_dev_count<=md_dev[minor].pers->max_invalid_dev));
760 }
761
762 #endif
763
764 int register_md_personality (int p_num, struct md_personality *p)
765 {
766 int i=(p_num >> PERSONALITY_SHIFT);
767
768 if (i >= MAX_PERSONALITY)
769 return -EINVAL;
770
771 if (pers[i])
772 return -EBUSY;
773
774 pers[i]=p;
775 printk ("%s personality registered\n", p->name);
776 return 0;
777 }
778
779 int unregister_md_personality (int p_num)
780 {
781 int i=(p_num >> PERSONALITY_SHIFT);
782
783 if (i >= MAX_PERSONALITY)
784 return -EINVAL;
785
786 printk ("%s personality unregistered\n", pers[i]->name);
787 pers[i]=NULL;
788 return 0;
789 }
790
791 void linear_init (void);
792 void raid0_init (void);
793 void raid1_init (void);
794 void raid5_init (void);
795
796 int md_init (void)
797 {
798 printk ("md driver %s MAX_MD_DEV=%d, MAX_REAL=%d\n", MD_VERSION, MAX_MD_DEV, MAX_REAL);
799
800 if (register_blkdev (MD_MAJOR, "md", &md_fops))
801 {
802 printk ("Unable to get major %d for md\n", MD_MAJOR);
803 return (-1);
804 }
805
806 blk_dev[MD_MAJOR].request_fn=DEVICE_REQUEST;
807 blk_dev[MD_MAJOR].current_request=NULL;
808 read_ahead[MD_MAJOR]=INT_MAX;
809 md_gendisk.next=gendisk_head;
810
811 gendisk_head=&md_gendisk;
812
813 #ifdef CONFIG_MD_LINEAR
814 linear_init ();
815 #endif
816 #ifdef CONFIG_MD_STRIPED
817 raid0_init ();
818 #endif
819 #ifdef CONFIG_MD_RAID1
820 raid1_init ();
821 #endif
822 #ifdef CONFIG_MD_RAID5
823 raid5_init ();
824 #endif
825
826 return (0);
827 }