This source file includes following definitions.
- find_gendisk
- partition_name
- set_ra
- md_ioctl
- md_open
- md_release
- remap_request
- do_md_request
- make_md_request
- md_geninit
- get_md_status
- md_valid_device
- md_can_reemit
- register_md_personality
- unregister_md_personality
- md_init
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 #include <linux/config.h>
21 #include <linux/module.h>
22 #include <linux/version.h>
23 #include <linux/malloc.h>
24 #include <linux/mm.h>
25 #include <linux/md.h>
26 #include <linux/hdreg.h>
27 #include <linux/stat.h>
28 #include <linux/fs.h>
29 #include <linux/proc_fs.h>
30 #include <linux/blkdev.h>
31 #include <errno.h>
32
33 #define MAJOR_NR MD_MAJOR
34 #define MD_DRIVER
35
36 #include <linux/blk.h>
37
38 #ifdef CONFIG_MD_SUPPORT_RAID5
39 int support_for_raid5;
40
41 #endif
42
43 #ifdef CONFIG_MD_SUPPORT_RAID1
44 int support_for_raid1;
45
46 #endif
47
48 static struct hd_struct md_hd_struct[MAX_MD_DEV];
49 static int md_blocksizes[MAX_MD_DEV];
50
51 int md_size[MAX_MD_DEV]={0, };
52
53 static void md_geninit (struct gendisk *);
54
55 static struct gendisk md_gendisk=
56 {
57 MD_MAJOR,
58 "md",
59 0,
60 1,
61 MAX_MD_DEV,
62 md_geninit,
63 md_hd_struct,
64 md_size,
65 MAX_MD_DEV,
66 NULL,
67 NULL
68 };
69
70 static struct md_personality *pers[MAX_PERSONALITY]={NULL, };
71
72 struct real_dev devices[MAX_MD_DEV][MAX_REAL];
73 struct md_dev md_dev[MAX_MD_DEV];
74
75 static struct gendisk *find_gendisk (kdev_t dev)
76 {
77 struct gendisk *tmp=gendisk_head;
78
79 while (tmp != NULL)
80 {
81 if (tmp->major==MAJOR(dev))
82 return (tmp);
83
84 tmp=tmp->next;
85 }
86
87 return (NULL);
88 }
89
90
91
92 char *partition_name (kdev_t dev)
93 {
94 static char name[10];
95
96 struct gendisk *hd=find_gendisk (dev);
97 char base_name;
98 int minor=MINOR(dev);
99
100 if (!hd)
101 {
102 printk ("No gendisk entry for dev %04x\n", dev);
103 sprintf (name, "dev %04x", dev);
104 return (name);
105 }
106
107 base_name = (hd->major == IDE1_MAJOR) ? 'c' : 'a';
108 sprintf(name, "%s%c%d",
109 hd->major_name,
110 base_name + (minor >> hd->minor_shift),
111 minor & ((1 << hd->minor_shift) - 1));
112 return (name);
113 }
114
115
116 static void set_ra (void)
117 {
118 int i, j, minra=INT_MAX;
119
120 for (i=0; i<MAX_MD_DEV; i++)
121 {
122 if (!md_dev[i].pers)
123 continue;
124
125 for (j=0; j<md_dev[i].nb_dev; j++)
126 if (read_ahead[MAJOR(devices[i][j].dev)]<minra)
127 minra=read_ahead[MAJOR(devices[i][j].dev)];
128 }
129
130 read_ahead[MD_MAJOR]=minra;
131 }
132
133
134 static int md_ioctl (struct inode *inode, struct file *file,
135 unsigned int cmd, unsigned long arg)
136 {
137 int minor, index, err, current_ra;
138 struct gendisk *gen_real;
139 struct hd_geometry *loc = (struct hd_geometry *) arg;
140 kdev_t dev;
141
142 if (!suser())
143 return -EACCES;
144
145 if (((minor=MINOR(inode->i_rdev)) & 0x80) &&
146 (minor & 0x7f) < MAX_PERSONALITY &&
147 pers[minor & 0x7f] &&
148 pers[minor & 0x7f]->ioctl)
149 return (pers[minor & 0x7f]->ioctl (inode, file, cmd, arg));
150
151 if (minor >= MAX_MD_DEV)
152 return -EINVAL;
153
154 switch (cmd)
155 {
156 case REGISTER_DEV:
157 dev=to_kdev_t ((dev_t) arg);
158 if (MAJOR(dev)==MD_MAJOR || md_dev[minor].nb_dev==MAX_REAL)
159 return -EINVAL;
160
161 if (!fs_may_mount (dev) || md_dev[minor].pers)
162 return -EBUSY;
163
164 if (!(gen_real=find_gendisk (dev)))
165 return -ENOENT;
166
167 index=md_dev[minor].nb_dev++;
168 devices[minor][index].dev=dev;
169
170
171
172
173
174
175 devices[minor][index].inode=get_empty_inode ();
176 devices[minor][index].inode->i_dev=dev;
177
178 insert_inode_hash (devices[minor][index].inode);
179
180
181
182
183
184 devices[minor][index].size=gen_real->sizes[MINOR(dev)] & (PAGE_MASK>>10);
185 devices[minor][index].offset=index ?
186 (devices[minor][index-1].offset + devices[minor][index-1].size) : 0;
187
188 if (!index)
189 md_size[minor]=devices[minor][index].size;
190 else
191 md_size[minor]+=devices[minor][index].size;
192
193 printk("REGISTER_DEV %s to md%x done\n", partition_name(dev), minor);
194 break;
195
196 case START_MD:
197 if (!md_dev[minor].nb_dev)
198 return -EINVAL;
199
200 if (md_dev[minor].pers)
201 return -EBUSY;
202
203 md_dev[minor].repartition=(int) arg;
204
205 if ((index=PERSONALITY(md_dev+minor) >> (PERSONALITY_SHIFT))
206 >= MAX_PERSONALITY ||
207 !pers[index])
208 return -EINVAL;
209
210 md_dev[minor].pers=pers[index];
211
212 if ((err=md_dev[minor].pers->run (minor, md_dev+minor)))
213 {
214 md_dev[minor].pers=NULL;
215 return (err);
216 }
217
218
219
220
221 md_hd_struct[minor].start_sect=0;
222 md_hd_struct[minor].nr_sects=md_size[minor]<<1;
223
224
225
226
227 current_ra=read_ahead[MD_MAJOR];
228
229 for (index=0; index<md_dev[minor].nb_dev; index++)
230 {
231 if (current_ra>read_ahead[MAJOR(devices[minor][index].dev)])
232 current_ra=read_ahead[MAJOR(devices[minor][index].dev)];
233
234 devices[minor][index].fault_count=0;
235 devices[minor][index].invalid=VALID;
236 }
237
238 read_ahead[MD_MAJOR]=current_ra;
239
240 printk ("START_DEV md%x %s\n", minor, md_dev[minor].pers->name);
241 break;
242
243 case STOP_MD:
244 if (inode->i_count>1 || md_dev[minor].busy>1)
245 {
246 printk ("STOP_MD md%x failed : i_count=%d, busy=%d\n", minor, inode->i_count, md_dev[minor].busy);
247 return -EBUSY;
248 }
249
250 if (md_dev[minor].pers)
251 {
252
253 fsync_dev (inode->i_rdev);
254 invalidate_buffers (inode->i_rdev);
255 md_dev[minor].pers->stop (minor, md_dev+minor);
256 }
257
258
259 for (index=0; index<md_dev[minor].nb_dev; index++)
260 clear_inode (devices[minor][index].inode);
261
262 md_dev[minor].nb_dev=md_size[minor]=0;
263 md_dev[minor].pers=NULL;
264
265 set_ra ();
266
267 printk ("STOP_DEV md%x\n", minor);
268 break;
269
270 #if defined(CONFIG_MD_SUPPORT_RAID1) || defined(CONFIG_MD_SUPPORT_RAID5)
271 case MD_INVALID:
272 dev=to_kdev_t ((dev_t) arg);
273 if (!(err=md_valid_device (minor, dev, INVALID_ALWAYS)))
274 printk ("md%d : %s disabled\n", minor, partition_name (dev));
275
276 return (err);
277
278 case MD_VALID:
279 dev=to_kdev_t ((dev_t) arg);
280 if (!(err=md_valid_device (minor, dev, VALID)))
281 printk ("md%d : %s enabled\n", minor, partition_name (dev));
282
283 return (err);
284 #endif
285
286 case BLKGETSIZE:
287 if (!arg) return -EINVAL;
288 err=verify_area (VERIFY_WRITE, (long *) arg, sizeof(long));
289 if (err)
290 return err;
291 put_user (md_hd_struct[MINOR(inode->i_rdev)].nr_sects, (long *) arg);
292 break;
293
294 case BLKFLSBUF:
295 fsync_dev (inode->i_rdev);
296 invalidate_buffers (inode->i_rdev);
297 break;
298
299 case BLKRASET:
300 if (arg > 0xff)
301 return -EINVAL;
302 read_ahead[MAJOR(inode->i_rdev)] = arg;
303 return 0;
304
305 case BLKRAGET:
306 if (!arg) return -EINVAL;
307 err=verify_area (VERIFY_WRITE, (long *) arg, sizeof(long));
308 if (err)
309 return err;
310 put_user (read_ahead[MAJOR(inode->i_rdev)], (long *) arg);
311 break;
312
313 case HDIO_GETGEO:
314 if (!loc) return -EINVAL;
315 err = verify_area(VERIFY_WRITE, loc, sizeof(*loc));
316 if (err)
317 return err;
318 put_user (2, (char *) &loc->heads);
319 put_user (4, (char *) &loc->sectors);
320 put_user (md_hd_struct[minor].nr_sects/8, (short *) &loc->cylinders);
321 put_user (md_hd_struct[MINOR(inode->i_rdev)].start_sect,
322 (long *) &loc->start);
323 break;
324
325 RO_IOCTLS(inode->i_rdev,arg);
326
327 default:
328 printk ("Unknown md_ioctl %d\n", cmd);
329 return -EINVAL;
330 }
331
332 return (0);
333 }
334
335
336 static int md_open (struct inode *inode, struct file *file)
337 {
338 int minor=MINOR(inode->i_rdev);
339
340 md_dev[minor].busy++;
341 return (0);
342 }
343
344
345 static void md_release (struct inode *inode, struct file *file)
346 {
347 int minor=MINOR(inode->i_rdev);
348
349 sync_dev (inode->i_rdev);
350 md_dev[minor].busy--;
351 }
352
353
354 static struct file_operations md_fops=
355 {
356 NULL,
357 block_read,
358 block_write,
359 NULL,
360 NULL,
361 md_ioctl,
362 NULL,
363 md_open,
364 md_release,
365 block_fsync
366 };
367
368
369 static inline int remap_request (int minor, struct request *req)
370 {
371 if (!md_dev[minor].pers)
372 {
373 printk ("Oops ! md%d not running, giving up !\n", minor);
374 return -1;
375 }
376
377 return (md_dev[minor].pers->map(minor, md_dev+minor, req));
378 }
379
380 static void do_md_request (void)
381 {
382 int minor;
383 struct request *req;
384
385 while (1)
386 {
387 #ifdef MD_COUNT_SIZE
388 int reqsize, chunksize;
389 #endif
390
391 cli ();
392 req = blk_dev[MD_MAJOR].current_request;
393 if (!req || (req->rq_status == RQ_INACTIVE))
394 {
395 sti ();
396 return;
397 }
398
399 #ifdef MD_COUNT_SIZE
400 reqsize=req->nr_sectors>>1;
401 chunksize=1 << FACTOR_SHIFT(FACTOR(md_dev+MINOR(req->rq_dev)));
402 if (reqsize==chunksize) (md_dev+MINOR(req->rq_dev))->equal_count++;
403 if (reqsize<chunksize) (md_dev+MINOR(req->rq_dev))->smallest_count++;
404 if (reqsize>chunksize) (md_dev+MINOR(req->rq_dev))->biggest_count++;
405 #endif
406
407 blk_dev[MD_MAJOR].current_request = req->next;
408 sti ();
409
410 minor = MINOR(req->rq_dev);
411 if ((MAJOR(req->rq_dev) != MD_MAJOR) || (minor >= MAX_REAL))
412 {
413 printk("md: bad device number: 0x%04x\n", req->rq_dev);
414 end_request(0, req);
415 continue;
416 }
417
418 switch (remap_request (minor, req))
419 {
420 case REDIRECTED_BHREQ:
421 req->rq_status=RQ_INACTIVE;
422 wake_up (&wait_for_request);
423 break;
424
425 case REDIRECTED_REQ:
426 break;
427
428 case REDIRECT_FAILED:
429 end_request (0, req);
430 break;
431
432 default:
433 printk ("remap_request returned strange value !\n");
434 }
435 }
436 }
437
438 extern struct semaphore request_lock;
439
440 void make_md_request (struct request *pending, int n)
441 {
442 int i, j, max_req, major=0, rw, found;
443 kdev_t dev;
444 struct buffer_head *bh;
445 struct request *req;
446
447 down (&request_lock);
448
449 for (i=0; i<n; i++)
450 {
451 if (!pending[i].bh)
452 continue;
453
454 cli();
455
456 found=0;
457 rw=pending[i].cmd;
458 bh=pending[i].bh;
459 major=MAJOR(dev=pending[i].rq_dev);
460
461 max_req = (rw == READ) ? NR_REQUEST : ((NR_REQUEST*2)/3);
462
463 if (( major == IDE0_MAJOR
464 || major == IDE1_MAJOR
465 || major == SCSI_DISK_MAJOR
466 || major == IDE2_MAJOR
467 || major == IDE3_MAJOR)
468 && (req = blk_dev[major].current_request))
469 {
470 #ifdef CONFIG_BLK_DEV_HD
471 if (major == HD_MAJOR)
472 req = req->next;
473 #endif CONFIG_BLK_DEV_HD
474
475 while (req && !found)
476 {
477 if (req->rq_status!=RQ_INACTIVE && req->rq_status!=RQ_ACTIVE)
478 printk ("Saw bad status request !\n");
479
480 if (req->rq_dev == dev &&
481 !req->sem &&
482 req->cmd == rw &&
483 req->sector + req->nr_sectors == pending[i].sector &&
484 (req->nr_sectors + pending[i].nr_sectors) < 245)
485 {
486 req->bhtail->b_reqnext = bh;
487 req->bhtail = pending[i].bhtail;
488 req->nr_sectors += pending[i].nr_sectors;
489 found=1;
490 continue;
491 }
492
493 if (!found &&
494 req->rq_dev == dev &&
495 !req->sem &&
496 req->cmd == rw &&
497 req->sector - pending[i].nr_sectors == pending[i].sector &&
498 (req->nr_sectors + pending[i].nr_sectors) < 245)
499 {
500 req->nr_sectors += pending[i].nr_sectors;
501 bh->b_reqnext = req->bh;
502 req->buffer = bh->b_data;
503 req->current_nr_sectors = bh->b_size >> 9;
504 req->sector = pending[i].sector;
505 req->bh = bh;
506 found=1;
507 continue;
508 }
509
510 req = req->next;
511 }
512 }
513
514 if (found)
515 continue;
516
517 up (&request_lock);
518 req=get_md_request (max_req, dev);
519
520
521 req->cmd = rw;
522 req->errors = 0;
523 #if defined (CONFIG_MD_SUPPORT_RAID1)
524 req->shared_count = 0;
525 #endif
526 req->sector = pending[i].sector;
527 req->nr_sectors = pending[i].nr_sectors;
528 req->current_nr_sectors = bh->b_size >> 9;
529 req->buffer = bh->b_data;
530 req->sem = NULL;
531 req->bh = bh;
532 req->bhtail = pending[i].bhtail;
533 req->next = NULL;
534
535 add_request (blk_dev + MAJOR(dev), req);
536 down (&request_lock);
537 }
538
539 up (&request_lock);
540 for (j=0; j<n; j++)
541 {
542 if (!pending[j].bh)
543 continue;
544
545 pending[j].bh=NULL;
546 }
547
548 sti ();
549 }
550
551
552 static struct symbol_table md_symbol_table=
553 {
554 #include <linux/symtab_begin.h>
555
556 X(devices),
557 X(md_size),
558 X(add_request),
559 X(make_md_request),
560
561 #ifdef CONFIG_MD_SUPPORT_RAID1
562 X(support_for_raid1),
563 #endif
564
565 #ifdef CONFIG_MD_SUPPORT_RAID5
566 X(support_for_raid5),
567 #endif
568
569 X(register_md_personality),
570 X(unregister_md_personality),
571 X(partition_name),
572
573 #if defined(CONFIG_MD_SUPPORT_RAID1) || defined(CONFIG_MD_SUPPORT_RAID5)
574 X(md_valid_device),
575 X(md_can_reemit),
576 #endif
577
578 #include <linux/symtab_end.h>
579 };
580
581
582 static void md_geninit (struct gendisk *gdisk)
583 {
584 int i;
585
586 for(i=0;i<MAX_MD_DEV;i++)
587 {
588 md_blocksizes[i] = 1024;
589 md_gendisk.part[i].start_sect=-1;
590 md_dev[i].pers=NULL;
591 #ifdef MD_COUNT_SIZES
592 md_dev[i].smallest_count=md_dev[i].biggest_count=md_dev[i].equal_count=0;
593 #endif
594 }
595
596 blksize_size[MAJOR_NR] = md_blocksizes;
597 register_symtab (&md_symbol_table);
598
599 proc_register(&proc_root,
600 &(struct proc_dir_entry)
601 {
602 PROC_MD, 6, "mdstat",
603 S_IFREG | S_IRUGO, 1, 0, 0,
604 });
605 }
606
607
608 int get_md_status (char *page)
609 {
610 int sz=0, i, j;
611
612 sz+=sprintf( page+sz, "Personalities : ");
613 for (i=0; i<MAX_PERSONALITY; i++)
614 if (pers[i])
615 sz+=sprintf (page+sz, "[%d %s] ", i, pers[i]->name);
616
617 page[sz-1]='\n';
618
619 sz+=sprintf (page+sz, "read_ahead ");
620 if (read_ahead[MD_MAJOR]==INT_MAX)
621 sz+=sprintf (page+sz, "not set\n");
622 else
623 sz+=sprintf (page+sz, "%d sectors\n", read_ahead[MD_MAJOR]);
624
625 for (i=0; i<MAX_MD_DEV; i++)
626 {
627 sz+=sprintf (page+sz, "md%d : %sactive", i, md_dev[i].pers ? "" : "in");
628
629 if (md_dev[i].pers)
630 sz+=sprintf (page+sz, " %s", md_dev[i].pers->name);
631
632 for (j=0; j<md_dev[i].nb_dev; j++)
633 sz+=sprintf (page+sz, " %s%s%s",
634 (devices[i][j].invalid==VALID) ? "" : "(",
635 partition_name(devices[i][j].dev),
636 (devices[i][j].invalid==VALID) ? "" : ")");
637
638 if (md_dev[i].nb_dev)
639 sz+=sprintf (page+sz, " %d blocks", md_size[i]);
640
641 if (!md_dev[i].pers)
642 {
643 sz+=sprintf (page+sz, "\n");
644 continue;
645 }
646
647 if (md_dev[i].pers->max_invalid_dev)
648 sz+=sprintf (page+sz, " maxfault=%ld", MAX_FAULT(md_dev+i));
649
650 if (md_dev[i].pers != pers[(LINEAR>>PERSONALITY_SHIFT)])
651 {
652 sz+=sprintf (page+sz, " %dk chunks", 1<<FACTOR_SHIFT(FACTOR(md_dev+i)));
653 #ifdef MD_COUNT_SIZES
654 sz+=sprintf (page+sz, " (%d/%d/%d)",
655 md_dev[i].smallest_count,
656 md_dev[i].equal_count,
657 md_dev[i].biggest_count);
658 #endif
659 }
660 sz+=sprintf (page+sz, "\n");
661 sz+=md_dev[i].pers->status (page+sz, i, md_dev+i);
662 }
663
664 return (sz);
665 }
666
667 #if defined(CONFIG_MD_SUPPORT_RAID1) || defined(CONFIG_MD_SUPPORT_RAID5)
668
669 int md_valid_device (int minor, kdev_t dev, int mode)
670 {
671 int i;
672
673 for (i=0; i<md_dev[minor].nb_dev; i++)
674 if (devices[minor][i].dev==dev)
675 break;
676
677 if (i>md_dev[minor].nb_dev)
678 {
679 printk ("Oops, dev %04x not found in md_valid_device\n", dev);
680 return -EINVAL;
681 }
682
683 switch (mode)
684 {
685 case VALID:
686
687
688
689 if (devices[minor][i].invalid==INVALID_ALWAYS)
690 {
691 devices[minor][i].fault_count=0;
692 if (md_dev[minor].invalid_dev_count)
693 md_dev[minor].invalid_dev_count--;
694 }
695 break;
696
697 case INVALID:
698 if (devices[minor][i].invalid != VALID )
699 return 0;
700
701 if (++devices[minor][i].fault_count > MAX_FAULT(md_dev+minor) &&
702 MAX_FAULT(md_dev+minor)!=0xFF)
703 {
704
705
706
707 mode=INVALID_ALWAYS;
708 md_dev[minor].invalid_dev_count++;
709 }
710 else
711
712
713
714
715
716 if (md_dev[minor].invalid_dev_count<=md_dev[minor].pers->max_invalid_dev)
717 mode=INVALID_NEXT;
718 else
719 mode=VALID;
720 break;
721
722 case INVALID_ALWAYS:
723 md_dev[minor].invalid_dev_count++;
724 }
725
726 devices[minor][i].invalid=mode;
727 return 0;
728 }
729
730
731 int md_can_reemit (int minor)
732 {
733
734
735
736
737
738
739
740 if (!md_dev[minor].pers)
741 return (0);
742
743 return(md_dev[minor].pers->max_invalid_dev &&
744 ((md_dev[minor].pers->max_invalid_dev==-1) ?
745 1 :
746 md_dev[minor].invalid_dev_count<=md_dev[minor].pers->max_invalid_dev));
747 }
748
749 #endif
750
751 int register_md_personality (int p_num, struct md_personality *p)
752 {
753 int i=(p_num >> PERSONALITY_SHIFT);
754
755 if (i >= MAX_PERSONALITY)
756 return -EINVAL;
757
758 if (pers[i])
759 return -EBUSY;
760
761 pers[i]=p;
762 printk ("%s personality registered\n", p->name);
763 return 0;
764 }
765
766 int unregister_md_personality (int p_num)
767 {
768 int i=(p_num >> PERSONALITY_SHIFT);
769
770 if (i >= MAX_PERSONALITY)
771 return -EINVAL;
772
773 printk ("%s personality unregistered\n", pers[i]->name);
774 pers[i]=NULL;
775 return 0;
776 }
777
778 void linear_init (void);
779 void raid0_init (void);
780 void raid1_init (void);
781 void raid5_init (void);
782
783 int md_init (void)
784 {
785 printk ("md driver %s MAX_MD_DEV=%d, MAX_REAL=%d\n", MD_VERSION, MAX_MD_DEV, MAX_REAL);
786
787 if (register_blkdev (MD_MAJOR, "md", &md_fops))
788 {
789 printk ("Unable to get major %d for md\n", MD_MAJOR);
790 return (-1);
791 }
792
793 blk_dev[MD_MAJOR].request_fn=DEVICE_REQUEST;
794 blk_dev[MD_MAJOR].current_request=NULL;
795 read_ahead[MD_MAJOR]=INT_MAX;
796 md_gendisk.next=gendisk_head;
797
798 gendisk_head=&md_gendisk;
799
800 #ifdef CONFIG_MD_LINEAR
801 linear_init ();
802 #endif
803 #ifdef CONFIG_MD_STRIPED
804 raid0_init ();
805 #endif
806 #ifdef CONFIG_MD_RAID1
807 raid1_init ();
808 #endif
809 #ifdef CONFIG_MD_RAID5
810 raid5_init ();
811 #endif
812
813 return (0);
814 }