root/kernel/sched.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. add_to_runqueue
  2. del_from_runqueue
  3. move_last_runqueue
  4. wake_up_process
  5. process_timeout
  6. goodness
  7. schedule
  8. sys_pause
  9. wake_up
  10. wake_up_interruptible
  11. __down
  12. __sleep_on
  13. interruptible_sleep_on
  14. sleep_on
  15. add_timer
  16. del_timer
  17. count_active_tasks
  18. calc_load
  19. second_overflow
  20. timer_bh
  21. tqueue_bh
  22. immediate_bh
  23. do_timer
  24. sys_alarm
  25. sys_getpid
  26. sys_getppid
  27. sys_getuid
  28. sys_geteuid
  29. sys_getgid
  30. sys_getegid
  31. sys_nice
  32. find_process_by_pid
  33. setscheduler
  34. sys_sched_setscheduler
  35. sys_sched_setparam
  36. sys_sched_getscheduler
  37. sys_sched_getparam
  38. sys_sched_yield
  39. sys_sched_get_priority_max
  40. sys_sched_get_priority_min
  41. sys_sched_rr_get_interval
  42. timespectojiffies
  43. jiffiestotimespec
  44. sys_nanosleep
  45. show_task
  46. show_state
  47. sched_init

   1 /*
   2  *  linux/kernel/sched.c
   3  *
   4  *  Copyright (C) 1991, 1992  Linus Torvalds
   5  */
   6 
   7 /*
   8  * 'sched.c' is the main kernel file. It contains scheduling primitives
   9  * (sleep_on, wakeup, schedule etc) as well as a number of simple system
  10  * call functions (type getpid(), which just extracts a field from
  11  * current-task
  12  */
  13 
  14 #include <linux/signal.h>
  15 #include <linux/sched.h>
  16 #include <linux/timer.h>
  17 #include <linux/kernel.h>
  18 #include <linux/kernel_stat.h>
  19 #include <linux/fdreg.h>
  20 #include <linux/errno.h>
  21 #include <linux/time.h>
  22 #include <linux/ptrace.h>
  23 #include <linux/delay.h>
  24 #include <linux/interrupt.h>
  25 #include <linux/tqueue.h>
  26 #include <linux/resource.h>
  27 #include <linux/mm.h>
  28 #include <linux/smp.h>
  29 
  30 #include <asm/system.h>
  31 #include <asm/io.h>
  32 #include <asm/segment.h>
  33 #include <asm/pgtable.h>
  34 #include <asm/mmu_context.h>
  35 
  36 #include <linux/timex.h>
  37 
  38 /*
  39  * kernel variables
  40  */
  41 
  42 int securelevel = 0;                    /* system security level */
  43 
  44 long tick = 1000000 / HZ;               /* timer interrupt period */
  45 volatile struct timeval xtime;          /* The current time */
  46 int tickadj = 500/HZ;                   /* microsecs */
  47 
  48 DECLARE_TASK_QUEUE(tq_timer);
  49 DECLARE_TASK_QUEUE(tq_immediate);
  50 DECLARE_TASK_QUEUE(tq_scheduler);
  51 
  52 /*
  53  * phase-lock loop variables
  54  */
  55 int time_state = TIME_BAD;     /* clock synchronization status */
  56 int time_status = STA_UNSYNC | STA_PLL; /* clock status bits */
  57 long time_offset = 0;           /* time adjustment (us) */
  58 long time_constant = 2;         /* pll time constant */
  59 long time_tolerance = MAXFREQ;  /* frequency tolerance (ppm) */
  60 long time_precision = 1;        /* clock precision (us) */
  61 long time_maxerror = 0x70000000;/* maximum error */
  62 long time_esterror = 0x70000000;/* estimated error */
  63 long time_phase = 0;            /* phase offset (scaled us) */
  64 long time_freq = 0;             /* frequency offset (scaled ppm) */
  65 long time_adj = 0;              /* tick adjust (scaled 1 / HZ) */
  66 long time_reftime = 0;          /* time at last adjustment (s) */
  67 
  68 long time_adjust = 0;
  69 long time_adjust_step = 0;
  70 
  71 int need_resched = 0;
  72 unsigned long event = 0;
  73 
  74 extern int _setitimer(int, struct itimerval *, struct itimerval *);
  75 unsigned int * prof_buffer = NULL;
  76 unsigned long prof_len = 0;
  77 unsigned long prof_shift = 0;
  78 
  79 #define _S(nr) (1<<((nr)-1))
  80 
  81 extern void mem_use(void);
  82 
  83 static unsigned long init_kernel_stack[1024] = { STACK_MAGIC, };
  84 unsigned long init_user_stack[1024] = { STACK_MAGIC, };
  85 static struct vm_area_struct init_mmap = INIT_MMAP;
  86 static struct fs_struct init_fs = INIT_FS;
  87 static struct files_struct init_files = INIT_FILES;
  88 static struct signal_struct init_signals = INIT_SIGNALS;
  89 
  90 struct mm_struct init_mm = INIT_MM;
  91 struct task_struct init_task = INIT_TASK;
  92 
  93 unsigned long volatile jiffies=0;
  94 
  95 struct task_struct *current_set[NR_CPUS];
  96 struct task_struct *last_task_used_math = NULL;
  97 
  98 struct task_struct * task[NR_TASKS] = {&init_task, };
  99 
 100 struct kernel_stat kstat = { 0 };
 101 
 102 static inline void add_to_runqueue(struct task_struct * p)
     /* [previous][next][first][last][top][bottom][index][help] */
 103 {
 104 #if 1   /* sanity tests */
 105         if (p->next_run || p->prev_run) {
 106                 printk("task already on run-queue\n");
 107                 return;
 108         }
 109 #endif
 110         if (p->counter > current->counter + 3)
 111                 need_resched = 1;
 112         nr_running++;
 113         (p->prev_run = init_task.prev_run)->next_run = p;
 114         p->next_run = &init_task;
 115         init_task.prev_run = p;
 116 }
 117 
 118 static inline void del_from_runqueue(struct task_struct * p)
     /* [previous][next][first][last][top][bottom][index][help] */
 119 {
 120         struct task_struct *next = p->next_run;
 121         struct task_struct *prev = p->prev_run;
 122 
 123 #if 1   /* sanity tests */
 124         if (!next || !prev) {
 125                 printk("task not on run-queue\n");
 126                 return;
 127         }
 128 #endif
 129         if (p == &init_task) {
 130                 static int nr = 0;
 131                 if (nr < 5) {
 132                         nr++;
 133                         printk("idle task may not sleep\n");
 134                 }
 135                 return;
 136         }
 137         nr_running--;
 138         next->prev_run = prev;
 139         prev->next_run = next;
 140         p->next_run = NULL;
 141         p->prev_run = NULL;
 142 }
 143 
 144 static inline void move_last_runqueue(struct task_struct * p)
     /* [previous][next][first][last][top][bottom][index][help] */
 145 {
 146         struct task_struct *next = p->next_run;
 147         struct task_struct *prev = p->prev_run;
 148 
 149         next->prev_run = prev;
 150         prev->next_run = next;
 151         (p->prev_run = init_task.prev_run)->next_run = p;
 152         p->next_run = &init_task;
 153         init_task.prev_run = p;
 154 }
 155 
 156 /*
 157  * Wake up a process. Put it on the run-queue if it's not
 158  * already there.  The "current" process is always on the
 159  * run-queue (except when the actual re-schedule is in
 160  * progress), and as such you're allowed to do the simpler
 161  * "current->state = TASK_RUNNING" to mark yourself runnable
 162  * without the overhead of this.
 163  */
 164 inline void wake_up_process(struct task_struct * p)
     /* [previous][next][first][last][top][bottom][index][help] */
 165 {
 166         unsigned long flags;
 167 
 168         save_flags(flags);
 169         cli();
 170         p->state = TASK_RUNNING;
 171         if (!p->next_run)
 172                 add_to_runqueue(p);
 173         restore_flags(flags);
 174 }
 175 
 176 static void process_timeout(unsigned long __data)
     /* [previous][next][first][last][top][bottom][index][help] */
 177 {
 178         struct task_struct * p = (struct task_struct *) __data;
 179 
 180         p->timeout = 0;
 181         wake_up_process(p);
 182 }
 183 
 184 /*
 185  * This is the function that decides how desireable a process is..
 186  * You can weigh different processes against each other depending
 187  * on what CPU they've run on lately etc to try to handle cache
 188  * and TLB miss penalties.
 189  *
 190  * Return values:
 191  *       -1000: never select this
 192  *           0: out of time, recalculate counters (but it might still be
 193  *              selected)
 194  *         +ve: "goodness" value (the larger, the better)
 195  *       +1000: realtime process, select this.
 196  */
 197 static inline int goodness(struct task_struct * p, struct task_struct * prev, int this_cpu)
     /* [previous][next][first][last][top][bottom][index][help] */
 198 {
 199         int weight;
 200 
 201 #ifdef __SMP__  
 202         /* We are not permitted to run a task someone else is running */
 203         if (p->processor != NO_PROC_ID)
 204                 return -1000;
 205 #endif
 206 
 207         /*
 208          * Realtime process, select the first one on the
 209          * runqueue (taking priorities within processes
 210          * into account).
 211          */
 212         if (p->policy != SCHED_OTHER)
 213                 return 1000 + p->rt_priority;
 214 
 215         /*
 216          * Give the process a first-approximation goodness value
 217          * according to the number of clock-ticks it has left.
 218          *
 219          * Don't do any other calculations if the time slice is
 220          * over..
 221          */
 222         weight = p->counter;
 223         if (weight) {
 224                         
 225 #ifdef __SMP__
 226                 /* Give a largish advantage to the same processor...   */
 227                 /* (this is equivalent to penalizing other processors) */
 228                 if (p->last_processor == this_cpu)
 229                         weight += PROC_CHANGE_PENALTY;
 230 #endif
 231 
 232                 /* .. and a slight advantage to the current process */
 233                 if (p == prev)
 234                         weight += 1;
 235         }
 236 
 237         return weight;
 238 }
 239 
 240 /*
 241  *  'schedule()' is the scheduler function. It's a very simple and nice
 242  * scheduler: it's not perfect, but certainly works for most things.
 243  *
 244  * The goto is "interesting".
 245  *
 246  *   NOTE!!  Task 0 is the 'idle' task, which gets called when no other
 247  * tasks can run. It can not be killed, and it cannot sleep. The 'state'
 248  * information in task[0] is never used.
 249  */
 250 asmlinkage void schedule(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 251 {
 252         int c;
 253         struct task_struct * p;
 254         struct task_struct * prev, * next;
 255         unsigned long timeout = 0;
 256         int this_cpu=smp_processor_id();
 257 
 258 /* check alarm, wake up any interruptible tasks that have got a signal */
 259 
 260         if (intr_count) {
 261                 printk("Aiee: scheduling in interrupt\n");
 262                 return;
 263         }
 264         if (bh_active & bh_mask) {
 265                 intr_count = 1;
 266                 do_bottom_half();
 267                 intr_count = 0;
 268         }
 269         run_task_queue(&tq_scheduler);
 270 
 271         need_resched = 0;
 272         prev = current;
 273         cli();
 274         /* move an exhausted RR process to be last.. */
 275         if (!prev->counter && prev->policy == SCHED_RR) {
 276                 prev->counter = prev->priority;
 277                 move_last_runqueue(prev);
 278         }
 279         switch (prev->state) {
 280                 case TASK_INTERRUPTIBLE:
 281                         if (prev->signal & ~prev->blocked)
 282                                 goto makerunnable;
 283                         timeout = prev->timeout;
 284                         if (timeout && (timeout <= jiffies)) {
 285                                 prev->timeout = 0;
 286                                 timeout = 0;
 287                 makerunnable:
 288                                 prev->state = TASK_RUNNING;
 289                                 break;
 290                         }
 291                 default:
 292                         del_from_runqueue(prev);
 293                 case TASK_RUNNING:
 294         }
 295         p = init_task.next_run;
 296         sti();
 297         
 298 #ifdef __SMP__
 299         /*
 300          *      This is safe as we do not permit re-entry of schedule()
 301          */
 302         prev->processor = NO_PROC_ID;   
 303 #endif  
 304 
 305 /*
 306  * Note! there may appear new tasks on the run-queue during this, as
 307  * interrupts are enabled. However, they will be put on front of the
 308  * list, so our list starting at "p" is essentially fixed.
 309  */
 310 /* this is the scheduler proper: */
 311         c = -1000;
 312         next = &init_task;
 313         while (p != &init_task) {
 314                 int weight = goodness(p, prev, this_cpu);
 315                 if (weight > c)
 316                         c = weight, next = p;
 317                 p = p->next_run;
 318         }
 319 
 320         /* if all runnable processes have "counter == 0", re-calculate counters */
 321         if (!c) {
 322                 for_each_task(p)
 323                         p->counter = (p->counter >> 1) + p->priority;
 324         }
 325 #ifdef __SMP__  
 326         
 327         /*
 328          *      Context switching between two idle threads is pointless.
 329          */
 330         if(!prev->pid && !next->pid)
 331                 next=prev;
 332         /*
 333          *      Allocate process to CPU
 334          */
 335          
 336          next->processor = this_cpu;
 337          next->last_processor = this_cpu;
 338          
 339 #endif   
 340 #ifdef __SMP_PROF__ 
 341         /* mark processor running an idle thread */
 342         if (0==next->pid)
 343                 set_bit(this_cpu,&smp_idle_map);
 344         else
 345                 clear_bit(this_cpu,&smp_idle_map);
 346 #endif
 347         if (prev != next) {
 348                 struct timer_list timer;
 349 
 350                 kstat.context_swtch++;
 351                 if (timeout) {
 352                         init_timer(&timer);
 353                         timer.expires = timeout;
 354                         timer.data = (unsigned long) prev;
 355                         timer.function = process_timeout;
 356                         add_timer(&timer);
 357                 }
 358                 get_mmu_context(next);
 359                 switch_to(prev,next);
 360                 if (timeout)
 361                         del_timer(&timer);
 362         }
 363 }
 364 
 365 #ifndef __alpha__
 366 
 367 /*
 368  * For backwards compatibility?  This can be done in libc so Alpha
 369  * and all newer ports shouldn't need it.
 370  */
 371 asmlinkage int sys_pause(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 372 {
 373         current->state = TASK_INTERRUPTIBLE;
 374         schedule();
 375         return -ERESTARTNOHAND;
 376 }
 377 
 378 #endif
 379 
 380 /*
 381  * wake_up doesn't wake up stopped processes - they have to be awakened
 382  * with signals or similar.
 383  *
 384  * Note that this doesn't need cli-sti pairs: interrupts may not change
 385  * the wait-queue structures directly, but only call wake_up() to wake
 386  * a process. The process itself must remove the queue once it has woken.
 387  */
 388 void wake_up(struct wait_queue **q)
     /* [previous][next][first][last][top][bottom][index][help] */
 389 {
 390         struct wait_queue *tmp;
 391         struct task_struct * p;
 392 
 393         if (!q || !(tmp = *q))
 394                 return;
 395         do {
 396                 if ((p = tmp->task) != NULL) {
 397                         if ((p->state == TASK_UNINTERRUPTIBLE) ||
 398                             (p->state == TASK_INTERRUPTIBLE))
 399                                 wake_up_process(p);
 400                 }
 401                 if (!tmp->next) {
 402                         printk("wait_queue is bad (eip = %p)\n",
 403                                 __builtin_return_address(0));
 404                         printk("        q = %p\n",q);
 405                         printk("       *q = %p\n",*q);
 406                         printk("      tmp = %p\n",tmp);
 407                         break;
 408                 }
 409                 tmp = tmp->next;
 410         } while (tmp != *q);
 411 }
 412 
 413 void wake_up_interruptible(struct wait_queue **q)
     /* [previous][next][first][last][top][bottom][index][help] */
 414 {
 415         struct wait_queue *tmp;
 416         struct task_struct * p;
 417 
 418         if (!q || !(tmp = *q))
 419                 return;
 420         do {
 421                 if ((p = tmp->task) != NULL) {
 422                         if (p->state == TASK_INTERRUPTIBLE)
 423                                 wake_up_process(p);
 424                 }
 425                 if (!tmp->next) {
 426                         printk("wait_queue is bad (eip = %p)\n",
 427                                 __builtin_return_address(0));
 428                         printk("        q = %p\n",q);
 429                         printk("       *q = %p\n",*q);
 430                         printk("      tmp = %p\n",tmp);
 431                         break;
 432                 }
 433                 tmp = tmp->next;
 434         } while (tmp != *q);
 435 }
 436 
 437 void __down(struct semaphore * sem)
     /* [previous][next][first][last][top][bottom][index][help] */
 438 {
 439         struct wait_queue wait = { current, NULL };
 440         add_wait_queue(&sem->wait, &wait);
 441         current->state = TASK_UNINTERRUPTIBLE;
 442         while (sem->count <= 0) {
 443                 schedule();
 444                 current->state = TASK_UNINTERRUPTIBLE;
 445         }
 446         current->state = TASK_RUNNING;
 447         remove_wait_queue(&sem->wait, &wait);
 448 }
 449 
 450 static inline void __sleep_on(struct wait_queue **p, int state)
     /* [previous][next][first][last][top][bottom][index][help] */
 451 {
 452         unsigned long flags;
 453         struct wait_queue wait = { current, NULL };
 454 
 455         if (!p)
 456                 return;
 457         if (current == task[0])
 458                 panic("task[0] trying to sleep");
 459         current->state = state;
 460         add_wait_queue(p, &wait);
 461         save_flags(flags);
 462         sti();
 463         schedule();
 464         remove_wait_queue(p, &wait);
 465         restore_flags(flags);
 466 }
 467 
 468 void interruptible_sleep_on(struct wait_queue **p)
     /* [previous][next][first][last][top][bottom][index][help] */
 469 {
 470         __sleep_on(p,TASK_INTERRUPTIBLE);
 471 }
 472 
 473 void sleep_on(struct wait_queue **p)
     /* [previous][next][first][last][top][bottom][index][help] */
 474 {
 475         __sleep_on(p,TASK_UNINTERRUPTIBLE);
 476 }
 477 
 478 /*
 479  * The head for the timer-list has a "expires" field of MAX_UINT,
 480  * and the sorting routine counts on this..
 481  */
 482 static struct timer_list timer_head = { &timer_head, &timer_head, ~0, 0, NULL };
 483 #define SLOW_BUT_DEBUGGING_TIMERS 0
 484 
 485 void add_timer(struct timer_list * timer)
     /* [previous][next][first][last][top][bottom][index][help] */
 486 {
 487         unsigned long flags;
 488         struct timer_list *p;
 489 
 490 #if SLOW_BUT_DEBUGGING_TIMERS
 491         if (timer->next || timer->prev) {
 492                 printk("add_timer() called with non-zero list from %p\n",
 493                         __builtin_return_address(0));
 494                 return;
 495         }
 496 #endif
 497         p = &timer_head;
 498         save_flags(flags);
 499         cli();
 500         do {
 501                 p = p->next;
 502         } while (timer->expires > p->expires);
 503         timer->next = p;
 504         timer->prev = p->prev;
 505         p->prev = timer;
 506         timer->prev->next = timer;
 507         restore_flags(flags);
 508 }
 509 
 510 int del_timer(struct timer_list * timer)
     /* [previous][next][first][last][top][bottom][index][help] */
 511 {
 512         unsigned long flags;
 513 #if SLOW_BUT_DEBUGGING_TIMERS
 514         struct timer_list * p;
 515 
 516         p = &timer_head;
 517         save_flags(flags);
 518         cli();
 519         while ((p = p->next) != &timer_head) {
 520                 if (p == timer) {
 521                         timer->next->prev = timer->prev;
 522                         timer->prev->next = timer->next;
 523                         timer->next = timer->prev = NULL;
 524                         restore_flags(flags);
 525                         return 1;
 526                 }
 527         }
 528         if (timer->next || timer->prev)
 529                 printk("del_timer() called from %p with timer not initialized\n",
 530                         __builtin_return_address(0));
 531         restore_flags(flags);
 532         return 0;
 533 #else
 534         struct timer_list * next;
 535         int ret = 0;
 536         save_flags(flags);
 537         cli();
 538         if ((next = timer->next) != NULL) {
 539                 (next->prev = timer->prev)->next = next;
 540                 timer->next = timer->prev = NULL;
 541                 ret = 1;
 542         }
 543         restore_flags(flags);
 544         return ret;
 545 #endif
 546 }
 547 
 548 unsigned long timer_active = 0;
 549 struct timer_struct timer_table[32];
 550 
 551 /*
 552  * Hmm.. Changed this, as the GNU make sources (load.c) seems to
 553  * imply that avenrun[] is the standard name for this kind of thing.
 554  * Nothing else seems to be standardized: the fractional size etc
 555  * all seem to differ on different machines.
 556  */
 557 unsigned long avenrun[3] = { 0,0,0 };
 558 
 559 /*
 560  * Nr of active tasks - counted in fixed-point numbers
 561  */
 562 static unsigned long count_active_tasks(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 563 {
 564         struct task_struct **p;
 565         unsigned long nr = 0;
 566 
 567         for(p = &LAST_TASK; p > &FIRST_TASK; --p)
 568                 if (*p && ((*p)->state == TASK_RUNNING ||
 569                            (*p)->state == TASK_UNINTERRUPTIBLE ||
 570                            (*p)->state == TASK_SWAPPING))
 571                         nr += FIXED_1;
 572 #ifdef __SMP__
 573         nr-=(smp_num_cpus-1)*FIXED_1;
 574 #endif                  
 575         return nr;
 576 }
 577 
 578 static inline void calc_load(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 579 {
 580         unsigned long active_tasks; /* fixed-point */
 581         static int count = LOAD_FREQ;
 582 
 583         if (count-- > 0)
 584                 return;
 585         count = LOAD_FREQ;
 586         active_tasks = count_active_tasks();
 587         CALC_LOAD(avenrun[0], EXP_1, active_tasks);
 588         CALC_LOAD(avenrun[1], EXP_5, active_tasks);
 589         CALC_LOAD(avenrun[2], EXP_15, active_tasks);
 590 }
 591 
 592 /*
 593  * this routine handles the overflow of the microsecond field
 594  *
 595  * The tricky bits of code to handle the accurate clock support
 596  * were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame.
 597  * They were originally developed for SUN and DEC kernels.
 598  * All the kudos should go to Dave for this stuff.
 599  *
 600  */
 601 static void second_overflow(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 602 {
 603     long ltemp;
 604 
 605     /* Bump the maxerror field */
 606     time_maxerror = (0x70000000-time_maxerror <
 607                      time_tolerance >> SHIFT_USEC) ?
 608         0x70000000 : (time_maxerror + (time_tolerance >> SHIFT_USEC));
 609 
 610     /*
 611      * Leap second processing. If in leap-insert state at
 612      * the end of the day, the system clock is set back one
 613      * second; if in leap-delete state, the system clock is
 614      * set ahead one second. The microtime() routine or
 615      * external clock driver will insure that reported time
 616      * is always monotonic. The ugly divides should be
 617      * replaced.
 618      */
 619     switch (time_state) {
 620 
 621     case TIME_OK:
 622         if (time_status & STA_INS)
 623             time_state = TIME_INS;
 624         else if (time_status & STA_DEL)
 625             time_state = TIME_DEL;
 626         break;
 627 
 628     case TIME_INS:
 629         if (xtime.tv_sec % 86400 == 0) {
 630             xtime.tv_sec--;
 631             time_state = TIME_OOP;
 632             printk("Clock: inserting leap second 23:59:60 UTC\n");
 633         }
 634         break;
 635 
 636     case TIME_DEL:
 637         if ((xtime.tv_sec + 1) % 86400 == 0) {
 638             xtime.tv_sec++;
 639             time_state = TIME_WAIT;
 640             printk("Clock: deleting leap second 23:59:59 UTC\n");
 641         }
 642         break;
 643 
 644     case TIME_OOP:
 645 
 646         time_state = TIME_WAIT;
 647         break;
 648 
 649     case TIME_WAIT:
 650         if (!(time_status & (STA_INS | STA_DEL)))
 651             time_state = TIME_OK;
 652     }
 653 
 654     /*
 655      * Compute the phase adjustment for the next second. In
 656      * PLL mode, the offset is reduced by a fixed factor
 657      * times the time constant. In FLL mode the offset is
 658      * used directly. In either mode, the maximum phase
 659      * adjustment for each second is clamped so as to spread
 660      * the adjustment over not more than the number of
 661      * seconds between updates.
 662      */
 663     if (time_offset < 0) {
 664         ltemp = -time_offset;
 665         if (!(time_status & STA_FLL))
 666             ltemp >>= SHIFT_KG + time_constant;
 667         if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE)
 668             ltemp = (MAXPHASE / MINSEC) <<
 669                 SHIFT_UPDATE;
 670         time_offset += ltemp;
 671         time_adj = -ltemp << (SHIFT_SCALE - SHIFT_HZ -
 672                               SHIFT_UPDATE);
 673     } else {
 674         ltemp = time_offset;
 675         if (!(time_status & STA_FLL))
 676             ltemp >>= SHIFT_KG + time_constant;
 677         if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE)
 678             ltemp = (MAXPHASE / MINSEC) <<
 679                 SHIFT_UPDATE;
 680         time_offset -= ltemp;
 681         time_adj = ltemp << (SHIFT_SCALE - SHIFT_HZ -
 682                              SHIFT_UPDATE);
 683     }
 684 
 685     /*
 686      * Compute the frequency estimate and additional phase
 687      * adjustment due to frequency error for the next
 688      * second. When the PPS signal is engaged, gnaw on the
 689      * watchdog counter and update the frequency computed by
 690      * the pll and the PPS signal.
 691      */
 692     pps_valid++;
 693     if (pps_valid == PPS_VALID) {
 694         pps_jitter = MAXTIME;
 695         pps_stabil = MAXFREQ;
 696         time_status &= ~(STA_PPSSIGNAL | STA_PPSJITTER |
 697                          STA_PPSWANDER | STA_PPSERROR);
 698     }
 699     ltemp = time_freq + pps_freq;
 700     if (ltemp < 0)
 701         time_adj -= -ltemp >>
 702             (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE);
 703     else
 704         time_adj += ltemp >>
 705             (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE);
 706 
 707 #if HZ == 100
 708     /* compensate for (HZ==100) != 128. Add 25% to get 125; => only 3% error */
 709     if (time_adj < 0)
 710         time_adj -= -time_adj >> 2;
 711     else
 712         time_adj += time_adj >> 2;
 713 #endif
 714 }
 715 
 716 /*
 717  * disregard lost ticks for now.. We don't care enough.
 718  */
 719 static void timer_bh(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 720 {
 721         unsigned long mask;
 722         struct timer_struct *tp;
 723         struct timer_list * timer;
 724 
 725         cli();
 726         while ((timer = timer_head.next) != &timer_head && timer->expires <= jiffies) {
 727                 void (*fn)(unsigned long) = timer->function;
 728                 unsigned long data = timer->data;
 729                 timer->next->prev = timer->prev;
 730                 timer->prev->next = timer->next;
 731                 timer->next = timer->prev = NULL;
 732                 sti();
 733                 fn(data);
 734                 cli();
 735         }
 736         sti();
 737         
 738         for (mask = 1, tp = timer_table+0 ; mask ; tp++,mask += mask) {
 739                 if (mask > timer_active)
 740                         break;
 741                 if (!(mask & timer_active))
 742                         continue;
 743                 if (tp->expires > jiffies)
 744                         continue;
 745                 timer_active &= ~mask;
 746                 tp->fn();
 747                 sti();
 748         }
 749 }
 750 
 751 void tqueue_bh(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 752 {
 753         run_task_queue(&tq_timer);
 754 }
 755 
 756 void immediate_bh(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 757 {
 758         run_task_queue(&tq_immediate);
 759 }
 760 
 761 void do_timer(struct pt_regs * regs)
     /* [previous][next][first][last][top][bottom][index][help] */
 762 {
 763         unsigned long mask;
 764         struct timer_struct *tp;
 765         long ltemp, psecs;
 766 #ifdef  __SMP_PROF__
 767         int cpu,i;
 768 #endif
 769 
 770         /* Advance the phase, once it gets to one microsecond, then
 771          * advance the tick more.
 772          */
 773         time_phase += time_adj;
 774         if (time_phase <= -FINEUSEC) {
 775                 ltemp = -time_phase >> SHIFT_SCALE;
 776                 time_phase += ltemp << SHIFT_SCALE;
 777                 xtime.tv_usec += tick + time_adjust_step - ltemp;
 778         }
 779         else if (time_phase >= FINEUSEC) {
 780                 ltemp = time_phase >> SHIFT_SCALE;
 781                 time_phase -= ltemp << SHIFT_SCALE;
 782                 xtime.tv_usec += tick + time_adjust_step + ltemp;
 783         } else
 784                 xtime.tv_usec += tick + time_adjust_step;
 785 
 786         if (time_adjust) {
 787             /* We are doing an adjtime thing. 
 788              *
 789              * Modify the value of the tick for next time.
 790              * Note that a positive delta means we want the clock
 791              * to run fast. This means that the tick should be bigger
 792              *
 793              * Limit the amount of the step for *next* tick to be
 794              * in the range -tickadj .. +tickadj
 795              */
 796              if (time_adjust > tickadj)
 797                time_adjust_step = tickadj;
 798              else if (time_adjust < -tickadj)
 799                time_adjust_step = -tickadj;
 800              else
 801                time_adjust_step = time_adjust;
 802              
 803             /* Reduce by this step the amount of time left  */
 804             time_adjust -= time_adjust_step;
 805         }
 806         else
 807             time_adjust_step = 0;
 808 
 809         if (xtime.tv_usec >= 1000000) {
 810             xtime.tv_usec -= 1000000;
 811             xtime.tv_sec++;
 812             second_overflow();
 813         }
 814 
 815         jiffies++;
 816         calc_load();
 817 #ifdef  __SMP_PROF__
 818         smp_idle_count[NR_CPUS]++;    /* count timer ticks */
 819         cpu = smp_processor_id();
 820         for (i=0;i<(0==smp_num_cpus?1:smp_num_cpus);i++) 
 821                 if (test_bit(i,&smp_idle_map)) smp_idle_count[i]++;
 822 #endif
 823         if (user_mode(regs)) {
 824                 current->utime++;
 825                 if (current->pid) {
 826                         if (current->priority < DEF_PRIORITY)
 827                                 kstat.cpu_nice++;
 828                         else
 829                                 kstat.cpu_user++;
 830                 }
 831                 /* Update ITIMER_VIRT for current task if not in a system call */
 832                 if (current->it_virt_value && !(--current->it_virt_value)) {
 833                         current->it_virt_value = current->it_virt_incr;
 834                         send_sig(SIGVTALRM,current,1);
 835                 }
 836         } else {
 837                 current->stime++;
 838                 if(current->pid)
 839                         kstat.cpu_system++;
 840                 if (prof_buffer && current->pid) {
 841                         extern int _stext;
 842                         unsigned long ip = instruction_pointer(regs);
 843                         ip -= (unsigned long) &_stext;
 844                         ip >>= prof_shift;
 845                         if (ip < prof_len)
 846                                 prof_buffer[ip]++;
 847                 }
 848         }
 849         /*
 850          * check the cpu time limit on the process.
 851          */
 852         if ((current->rlim[RLIMIT_CPU].rlim_max != RLIM_INFINITY) &&
 853             (((current->stime + current->utime) / HZ) >= current->rlim[RLIMIT_CPU].rlim_max))
 854                 send_sig(SIGKILL, current, 1);
 855         if ((current->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) &&
 856             (((current->stime + current->utime) % HZ) == 0)) {
 857                 psecs = (current->stime + current->utime) / HZ;
 858                 /* send when equal */
 859                 if (psecs == current->rlim[RLIMIT_CPU].rlim_cur)
 860                         send_sig(SIGXCPU, current, 1);
 861                 /* and every five seconds thereafter. */
 862                 else if ((psecs > current->rlim[RLIMIT_CPU].rlim_cur) &&
 863                         ((psecs - current->rlim[RLIMIT_CPU].rlim_cur) % 5) == 0)
 864                         send_sig(SIGXCPU, current, 1);
 865         }
 866 
 867         if (current->pid && 0 > --current->counter) {
 868                 current->counter = 0;
 869                 need_resched = 1;
 870         }
 871         /* Update ITIMER_PROF for the current task */
 872         if (current->it_prof_value && !(--current->it_prof_value)) {
 873                 current->it_prof_value = current->it_prof_incr;
 874                 send_sig(SIGPROF,current,1);
 875         }
 876         for (mask = 1, tp = timer_table+0 ; mask ; tp++,mask += mask) {
 877                 if (mask > timer_active)
 878                         break;
 879                 if (!(mask & timer_active))
 880                         continue;
 881                 if (tp->expires > jiffies)
 882                         continue;
 883                 mark_bh(TIMER_BH);
 884         }
 885         if (timer_head.next->expires <= jiffies)
 886                 mark_bh(TIMER_BH);
 887         if (tq_timer != &tq_last)
 888                 mark_bh(TQUEUE_BH);
 889 }
 890 
 891 #ifndef __alpha__
 892 
 893 /*
 894  * For backwards compatibility?  This can be done in libc so Alpha
 895  * and all newer ports shouldn't need it.
 896  */
 897 asmlinkage unsigned int sys_alarm(unsigned int seconds)
     /* [previous][next][first][last][top][bottom][index][help] */
 898 {
 899         struct itimerval it_new, it_old;
 900         unsigned int oldalarm;
 901 
 902         it_new.it_interval.tv_sec = it_new.it_interval.tv_usec = 0;
 903         it_new.it_value.tv_sec = seconds;
 904         it_new.it_value.tv_usec = 0;
 905         _setitimer(ITIMER_REAL, &it_new, &it_old);
 906         oldalarm = it_old.it_value.tv_sec;
 907         /* ehhh.. We can't return 0 if we have an alarm pending.. */
 908         /* And we'd better return too much than too little anyway */
 909         if (it_old.it_value.tv_usec)
 910                 oldalarm++;
 911         return oldalarm;
 912 }
 913 
 914 /*
 915  * The Alpha uses getxpid, getxuid, and getxgid instead.  Maybe this
 916  * should be moved into arch/i386 instead?
 917  */
 918 asmlinkage int sys_getpid(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 919 {
 920         return current->pid;
 921 }
 922 
 923 asmlinkage int sys_getppid(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 924 {
 925         return current->p_opptr->pid;
 926 }
 927 
 928 asmlinkage int sys_getuid(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 929 {
 930         return current->uid;
 931 }
 932 
 933 asmlinkage int sys_geteuid(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 934 {
 935         return current->euid;
 936 }
 937 
 938 asmlinkage int sys_getgid(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 939 {
 940         return current->gid;
 941 }
 942 
 943 asmlinkage int sys_getegid(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 944 {
 945         return current->egid;
 946 }
 947 
 948 /*
 949  * This has been replaced by sys_setpriority.  Maybe it should be
 950  * moved into the arch depedent tree for those ports that require
 951  * it for backward compatibility?
 952  */
 953 asmlinkage int sys_nice(int increment)
     /* [previous][next][first][last][top][bottom][index][help] */
 954 {
 955         unsigned long newprio;
 956         int increase = 0;
 957 
 958         newprio = increment;
 959         if (increment < 0) {
 960                 if (!suser())
 961                         return -EPERM;
 962                 newprio = -increment;
 963                 increase = 1;
 964         }
 965         if (newprio > 40)
 966                 newprio = 40;
 967         /*
 968          * do a "normalization" of the priority (traditionally
 969          * unix nice values are -20..20, linux doesn't really
 970          * use that kind of thing, but uses the length of the
 971          * timeslice instead (default 150 msec). The rounding is
 972          * why we want to avoid negative values.
 973          */
 974         newprio = (newprio * DEF_PRIORITY + 10) / 20;
 975         increment = newprio;
 976         if (increase)
 977                 increment = -increment;
 978         newprio = current->priority - increment;
 979         if (newprio < 1)
 980                 newprio = 1;
 981         if (newprio > DEF_PRIORITY*2)
 982                 newprio = DEF_PRIORITY*2;
 983         current->priority = newprio;
 984         return 0;
 985 }
 986 
 987 #endif
 988 
 989 static struct task_struct *find_process_by_pid(pid_t pid) {
     /* [previous][next][first][last][top][bottom][index][help] */
 990         struct task_struct *p, *q;
 991 
 992         if (pid == 0)
 993                 p = current;
 994         else {
 995                 p = 0;
 996                 for_each_task(q) {
 997                         if (q && q->pid == pid) {
 998                                 p = q;
 999                                 break;
1000                         }
1001                 }
1002         }
1003         return p;
1004 }
1005 
1006 static int setscheduler(pid_t pid, int policy, 
     /* [previous][next][first][last][top][bottom][index][help] */
1007                         struct sched_param *param)
1008 {
1009         int error;
1010         struct sched_param lp;
1011         struct task_struct *p;
1012 
1013         if (!param || pid < 0)
1014                 return -EINVAL;
1015 
1016         error = verify_area(VERIFY_READ, param, sizeof(struct sched_param));
1017         if (error)
1018                 return error;
1019         memcpy_fromfs(&lp, param, sizeof(struct sched_param));
1020 
1021         p = find_process_by_pid(pid);
1022         if (!p)
1023                 return -ESRCH;
1024                         
1025         if (policy < 0)
1026                 policy = p->policy;
1027         else if (policy != SCHED_FIFO && policy != SCHED_RR &&
1028                  policy != SCHED_OTHER)
1029                 return -EINVAL;
1030         
1031         /*
1032          * Valid priorities for SCHED_FIFO and SCHED_RR are 1..99, valid
1033          * priority for SCHED_OTHER is 0.
1034          */
1035         if (lp.sched_priority < 0 || lp.sched_priority > 99)
1036                 return -EINVAL;
1037         if ((policy == SCHED_OTHER) != (lp.sched_priority == 0))
1038                 return -EINVAL;
1039 
1040         if ((policy == SCHED_FIFO || policy == SCHED_RR) && !suser())
1041                 return -EPERM;
1042         if ((current->euid != p->euid) && (current->euid != p->uid) &&
1043             !suser())
1044                 return -EPERM;
1045 
1046         p->policy = policy;
1047         p->rt_priority = lp.sched_priority;
1048         if (p->next_run)
1049                 move_last_runqueue(p);
1050         schedule();
1051 
1052         return 0;
1053 }
1054 
1055 asmlinkage int sys_sched_setscheduler(pid_t pid, int policy, 
     /* [previous][next][first][last][top][bottom][index][help] */
1056                                       struct sched_param *param)
1057 {
1058         return setscheduler(pid, policy, param);
1059 }
1060 
1061 asmlinkage int sys_sched_setparam(pid_t pid, struct sched_param *param)
     /* [previous][next][first][last][top][bottom][index][help] */
1062 {
1063         return setscheduler(pid, -1, param);
1064 }
1065 
1066 asmlinkage int sys_sched_getscheduler(pid_t pid)
     /* [previous][next][first][last][top][bottom][index][help] */
1067 {
1068         struct task_struct *p;
1069 
1070         if (pid < 0)
1071                 return -EINVAL;
1072 
1073         p = find_process_by_pid(pid);
1074         if (!p)
1075                 return -ESRCH;
1076                         
1077         return p->policy;
1078 }
1079 
1080 asmlinkage int sys_sched_getparam(pid_t pid, struct sched_param *param)
     /* [previous][next][first][last][top][bottom][index][help] */
1081 {
1082         int error;
1083         struct task_struct *p;
1084         struct sched_param lp;
1085 
1086         if (!param || pid < 0)
1087                 return -EINVAL;
1088 
1089         error = verify_area(VERIFY_WRITE, param, sizeof(struct sched_param));
1090         if (error)
1091                 return error;
1092 
1093         p = find_process_by_pid(pid);
1094         if (!p)
1095                 return -ESRCH;
1096 
1097         lp.sched_priority = p->rt_priority;
1098         memcpy_tofs(param, &lp, sizeof(struct sched_param));
1099 
1100         return 0;
1101 }
1102 
1103 asmlinkage int sys_sched_yield(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1104 {
1105         move_last_runqueue(current);
1106 
1107         return 0;
1108 }
1109 
1110 asmlinkage int sys_sched_get_priority_max(int policy)
     /* [previous][next][first][last][top][bottom][index][help] */
1111 {
1112         switch (policy) {
1113               case SCHED_FIFO:
1114               case SCHED_RR:
1115                 return 99;
1116               case SCHED_OTHER:
1117                 return 0;
1118         }
1119 
1120         return -EINVAL;
1121 }
1122 
1123 asmlinkage int sys_sched_get_priority_min(int policy)
     /* [previous][next][first][last][top][bottom][index][help] */
1124 {
1125         switch (policy) {
1126               case SCHED_FIFO:
1127               case SCHED_RR:
1128                 return 1;
1129               case SCHED_OTHER:
1130                 return 0;
1131         }
1132 
1133         return -EINVAL;
1134 }
1135 
1136 asmlinkage int sys_sched_rr_get_interval(pid_t pid, struct timespec *interval)
     /* [previous][next][first][last][top][bottom][index][help] */
1137 {
1138         int error;
1139         struct timespec t;
1140 
1141         error = verify_area(VERIFY_WRITE, interval, sizeof(struct timespec));
1142         if (error)
1143                 return error;
1144         
1145         t.tv_sec = 0;
1146         t.tv_nsec = 0;   /* <-- Linus, please fill correct value in here */
1147         return -ENOSYS;  /* and then delete this line. Thanks!           */
1148         memcpy_tofs(interval, &t, sizeof(struct timespec));
1149 
1150         return 0;
1151 }
1152 
1153 /*
1154  * change timeval to jiffies, trying to avoid the 
1155  * most obvious overflows..
1156  */
1157 static unsigned long timespectojiffies(struct timespec *value)
     /* [previous][next][first][last][top][bottom][index][help] */
1158 {
1159         unsigned long sec = (unsigned) value->tv_sec;
1160         long nsec = value->tv_nsec;
1161 
1162         if (sec > (LONG_MAX / HZ))
1163                 return LONG_MAX;
1164         nsec += 1000000000L / HZ - 1;
1165         nsec /= 1000000000L / HZ;
1166         return HZ * sec + nsec;
1167 }
1168 
1169 static void jiffiestotimespec(unsigned long jiffies, struct timespec *value)
     /* [previous][next][first][last][top][bottom][index][help] */
1170 {
1171         value->tv_nsec = (jiffies % HZ) * (1000000000L / HZ);
1172         value->tv_sec = jiffies / HZ;
1173         return;
1174 }
1175 
1176 asmlinkage int sys_nanosleep(struct timespec *rqtp, struct timespec *rmtp)
     /* [previous][next][first][last][top][bottom][index][help] */
1177 {
1178         int error;
1179         struct timespec t;
1180         unsigned long expire;
1181 
1182         error = verify_area(VERIFY_READ, rqtp, sizeof(struct timespec));
1183         if (error)
1184                 return error;
1185         memcpy_fromfs(&t, rqtp, sizeof(struct timespec));
1186         if (rmtp) {
1187                 error = verify_area(VERIFY_WRITE, rmtp,
1188                                     sizeof(struct timespec));
1189                 if (error)
1190                         return error;
1191         }
1192 
1193         if (t.tv_nsec >= 1000000000L || t.tv_nsec < 0 || t.tv_sec < 0)
1194                 return -EINVAL;
1195 
1196         if (t.tv_sec == 0 && t.tv_nsec <= 2000000L &&
1197             current->policy != SCHED_OTHER) {
1198                 /*
1199                  * Short delay requests up to 2 ms will be handled with
1200                  * high precision by a busy wait for all real-time processes.
1201                  */
1202                 udelay((t.tv_nsec + 999) / 1000);
1203                 return 0;
1204         }
1205 
1206         expire = timespectojiffies(&t) + (t.tv_sec || t.tv_nsec) + jiffies;
1207         current->timeout = expire;
1208         current->state = TASK_INTERRUPTIBLE;
1209         schedule();
1210 
1211         if (expire > jiffies) {
1212                 if (rmtp) {
1213                         jiffiestotimespec(expire - jiffies -
1214                                           (expire > jiffies + 1), &t);
1215                         memcpy_tofs(rmtp, &t, sizeof(struct timespec));
1216                 }
1217                 return -EINTR;
1218         }
1219 
1220         return 0;
1221 }
1222 
1223 static void show_task(int nr,struct task_struct * p)
     /* [previous][next][first][last][top][bottom][index][help] */
1224 {
1225         unsigned long free;
1226         static const char * stat_nam[] = { "R", "S", "D", "Z", "T", "W" };
1227 
1228         printk("%-8s %3d ", p->comm, (p == current) ? -nr : nr);
1229         if (((unsigned) p->state) < sizeof(stat_nam)/sizeof(char *))
1230                 printk(stat_nam[p->state]);
1231         else
1232                 printk(" ");
1233 #if ((~0UL) == 0xffffffff)
1234         if (p == current)
1235                 printk(" current  ");
1236         else
1237                 printk(" %08lX ", thread_saved_pc(&p->tss));
1238 #else
1239         if (p == current)
1240                 printk("   current task   ");
1241         else
1242                 printk(" %016lx ", thread_saved_pc(&p->tss));
1243 #endif
1244         for (free = 1; free < PAGE_SIZE/sizeof(long) ; free++) {
1245                 if (((unsigned long *)p->kernel_stack_page)[free])
1246                         break;
1247         }
1248         printk("%5lu %5d %6d ", free*sizeof(long), p->pid, p->p_pptr->pid);
1249         if (p->p_cptr)
1250                 printk("%5d ", p->p_cptr->pid);
1251         else
1252                 printk("      ");
1253         if (p->p_ysptr)
1254                 printk("%7d", p->p_ysptr->pid);
1255         else
1256                 printk("       ");
1257         if (p->p_osptr)
1258                 printk(" %5d\n", p->p_osptr->pid);
1259         else
1260                 printk("\n");
1261 }
1262 
1263 void show_state(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1264 {
1265         int i;
1266 
1267 #if ((~0UL) == 0xffffffff)
1268         printk("\n"
1269                "                         free                        sibling\n");
1270         printk("  task             PC    stack   pid father child younger older\n");
1271 #else
1272         printk("\n"
1273                "                                 free                        sibling\n");
1274         printk("  task                 PC        stack   pid father child younger older\n");
1275 #endif
1276         for (i=0 ; i<NR_TASKS ; i++)
1277                 if (task[i])
1278                         show_task(i,task[i]);
1279 }
1280 
1281 void sched_init(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1282 {
1283         /*
1284          *      We have to do a little magic to get the first
1285          *      process right in SMP mode.
1286          */
1287         int cpu=smp_processor_id();
1288         current_set[cpu]=&init_task;
1289 #ifdef __SMP__  
1290         init_task.processor=cpu;
1291 #endif
1292         init_bh(TIMER_BH, timer_bh);
1293         init_bh(TQUEUE_BH, tqueue_bh);
1294         init_bh(IMMEDIATE_BH, immediate_bh);
1295 }

/* [previous][next][first][last][top][bottom][index][help] */