root/kernel/sched.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. add_to_runqueue
  2. del_from_runqueue
  3. move_last_runqueue
  4. wake_up_process
  5. process_timeout
  6. goodness
  7. schedule
  8. sys_pause
  9. wake_up
  10. wake_up_interruptible
  11. __down
  12. __sleep_on
  13. interruptible_sleep_on
  14. sleep_on
  15. add_timer
  16. del_timer
  17. count_active_tasks
  18. calc_load
  19. second_overflow
  20. timer_bh
  21. tqueue_bh
  22. immediate_bh
  23. do_timer
  24. sys_alarm
  25. sys_getpid
  26. sys_getppid
  27. sys_getuid
  28. sys_geteuid
  29. sys_getgid
  30. sys_getegid
  31. sys_nice
  32. find_process_by_pid
  33. setscheduler
  34. sys_sched_setscheduler
  35. sys_sched_setparam
  36. sys_sched_getscheduler
  37. sys_sched_getparam
  38. sys_sched_yield
  39. sys_sched_get_priority_max
  40. sys_sched_get_priority_min
  41. sys_sched_rr_get_interval
  42. show_task
  43. show_state
  44. sched_init

   1 /*
   2  *  linux/kernel/sched.c
   3  *
   4  *  Copyright (C) 1991, 1992  Linus Torvalds
   5  */
   6 
   7 /*
   8  * 'sched.c' is the main kernel file. It contains scheduling primitives
   9  * (sleep_on, wakeup, schedule etc) as well as a number of simple system
  10  * call functions (type getpid(), which just extracts a field from
  11  * current-task
  12  */
  13 
  14 #include <linux/signal.h>
  15 #include <linux/sched.h>
  16 #include <linux/timer.h>
  17 #include <linux/kernel.h>
  18 #include <linux/kernel_stat.h>
  19 #include <linux/fdreg.h>
  20 #include <linux/errno.h>
  21 #include <linux/time.h>
  22 #include <linux/ptrace.h>
  23 #include <linux/delay.h>
  24 #include <linux/interrupt.h>
  25 #include <linux/tqueue.h>
  26 #include <linux/resource.h>
  27 #include <linux/mm.h>
  28 #include <linux/smp.h>
  29 
  30 #include <asm/system.h>
  31 #include <asm/io.h>
  32 #include <asm/segment.h>
  33 #include <asm/pgtable.h>
  34 #include <asm/mmu_context.h>
  35 
  36 #include <linux/timex.h>
  37 
  38 /*
  39  * kernel variables
  40  */
  41 
  42 int securelevel = 0;                    /* system security level */
  43 
  44 long tick = 1000000 / HZ;               /* timer interrupt period */
  45 volatile struct timeval xtime;          /* The current time */
  46 int tickadj = 500/HZ;                   /* microsecs */
  47 
  48 DECLARE_TASK_QUEUE(tq_timer);
  49 DECLARE_TASK_QUEUE(tq_immediate);
  50 DECLARE_TASK_QUEUE(tq_scheduler);
  51 
  52 /*
  53  * phase-lock loop variables
  54  */
  55 int time_state = TIME_BAD;     /* clock synchronization status */
  56 int time_status = STA_UNSYNC | STA_PLL; /* clock status bits */
  57 long time_offset = 0;           /* time adjustment (us) */
  58 long time_constant = 2;         /* pll time constant */
  59 long time_tolerance = MAXFREQ;  /* frequency tolerance (ppm) */
  60 long time_precision = 1;        /* clock precision (us) */
  61 long time_maxerror = 0x70000000;/* maximum error */
  62 long time_esterror = 0x70000000;/* estimated error */
  63 long time_phase = 0;            /* phase offset (scaled us) */
  64 long time_freq = 0;             /* frequency offset (scaled ppm) */
  65 long time_adj = 0;              /* tick adjust (scaled 1 / HZ) */
  66 long time_reftime = 0;          /* time at last adjustment (s) */
  67 
  68 long time_adjust = 0;
  69 long time_adjust_step = 0;
  70 
  71 int need_resched = 0;
  72 unsigned long event = 0;
  73 
  74 extern int _setitimer(int, struct itimerval *, struct itimerval *);
  75 unsigned int * prof_buffer = NULL;
  76 unsigned long prof_len = 0;
  77 unsigned long prof_shift = 0;
  78 
  79 #define _S(nr) (1<<((nr)-1))
  80 
  81 extern void mem_use(void);
  82 
  83 static unsigned long init_kernel_stack[1024] = { STACK_MAGIC, };
  84 unsigned long init_user_stack[1024] = { STACK_MAGIC, };
  85 static struct vm_area_struct init_mmap = INIT_MMAP;
  86 static struct fs_struct init_fs = INIT_FS;
  87 static struct files_struct init_files = INIT_FILES;
  88 static struct signal_struct init_signals = INIT_SIGNALS;
  89 
  90 struct mm_struct init_mm = INIT_MM;
  91 struct task_struct init_task = INIT_TASK;
  92 
  93 unsigned long volatile jiffies=0;
  94 
  95 struct task_struct *current_set[NR_CPUS];
  96 struct task_struct *last_task_used_math = NULL;
  97 
  98 struct task_struct * task[NR_TASKS] = {&init_task, };
  99 
 100 struct kernel_stat kstat = { 0 };
 101 
 102 static inline void add_to_runqueue(struct task_struct * p)
     /* [previous][next][first][last][top][bottom][index][help] */
 103 {
 104 #if 1   /* sanity tests */
 105         if (p->next_run || p->prev_run) {
 106                 printk("task already on run-queue\n");
 107                 return;
 108         }
 109 #endif
 110         if (p->counter > current->counter + 3)
 111                 need_resched = 1;
 112         nr_running++;
 113         (p->prev_run = init_task.prev_run)->next_run = p;
 114         p->next_run = &init_task;
 115         init_task.prev_run = p;
 116 }
 117 
 118 static inline void del_from_runqueue(struct task_struct * p)
     /* [previous][next][first][last][top][bottom][index][help] */
 119 {
 120         struct task_struct *next = p->next_run;
 121         struct task_struct *prev = p->prev_run;
 122 
 123 #if 1   /* sanity tests */
 124         if (!next || !prev) {
 125                 printk("task not on run-queue\n");
 126                 return;
 127         }
 128 #endif
 129         if (p == &init_task) {
 130                 static int nr = 0;
 131                 if (nr < 5) {
 132                         nr++;
 133                         printk("idle task may not sleep\n");
 134                 }
 135                 return;
 136         }
 137         nr_running--;
 138         next->prev_run = prev;
 139         prev->next_run = next;
 140         p->next_run = NULL;
 141         p->prev_run = NULL;
 142 }
 143 
 144 static inline void move_last_runqueue(struct task_struct * p)
     /* [previous][next][first][last][top][bottom][index][help] */
 145 {
 146         struct task_struct *next = p->next_run;
 147         struct task_struct *prev = p->prev_run;
 148 
 149         next->prev_run = prev;
 150         prev->next_run = next;
 151         (p->prev_run = init_task.prev_run)->next_run = p;
 152         p->next_run = &init_task;
 153         init_task.prev_run = p;
 154 }
 155 
 156 /*
 157  * Wake up a process. Put it on the run-queue if it's not
 158  * already there.  The "current" process is always on the
 159  * run-queue (except when the actual re-schedule is in
 160  * progress), and as such you're allowed to do the simpler
 161  * "current->state = TASK_RUNNING" to mark yourself runnable
 162  * without the overhead of this.
 163  */
 164 inline void wake_up_process(struct task_struct * p)
     /* [previous][next][first][last][top][bottom][index][help] */
 165 {
 166         unsigned long flags;
 167 
 168         save_flags(flags);
 169         cli();
 170         p->state = TASK_RUNNING;
 171         if (!p->next_run)
 172                 add_to_runqueue(p);
 173         restore_flags(flags);
 174 }
 175 
 176 static void process_timeout(unsigned long __data)
     /* [previous][next][first][last][top][bottom][index][help] */
 177 {
 178         struct task_struct * p = (struct task_struct *) __data;
 179 
 180         p->timeout = 0;
 181         wake_up_process(p);
 182 }
 183 
 184 /*
 185  * This is the function that decides how desireable a process is..
 186  * You can weigh different processes against each other depending
 187  * on what CPU they've run on lately etc to try to handle cache
 188  * and TLB miss penalties.
 189  *
 190  * Return values:
 191  *       -1000: never select this
 192  *           0: out of time, recalculate counters (but it might still be
 193  *              selected)
 194  *         +ve: "goodness" value (the larger, the better)
 195  *       +1000: realtime process, select this.
 196  */
 197 static inline int goodness(struct task_struct * p, int this_cpu)
     /* [previous][next][first][last][top][bottom][index][help] */
 198 {
 199         int weight;
 200 
 201 #ifdef __SMP__  
 202         /* We are not permitted to run a task someone else is running */
 203         if (p->processor != NO_PROC_ID)
 204                 return -1000;
 205 #endif
 206 
 207         /*
 208          * Realtime process, select the first one on the
 209          * runqueue (taking priorities within processes
 210          * into account).
 211          */
 212         if (p->policy != SCHED_OTHER)
 213                 return 1000 + p->rt_priority;
 214 
 215         /*
 216          * Give the process a first-approximation goodness value
 217          * according to the number of clock-ticks it has left.
 218          *
 219          * Don't do any other calculations if the time slice is
 220          * over..
 221          */
 222         weight = p->counter;
 223         if (weight) {
 224                         
 225 #ifdef __SMP__
 226                 /* Give a largish advantage to the same processor...   */
 227                 /* (this is equivalent to penalizing other processors) */
 228                 if (p->last_processor == this_cpu)
 229                         weight += PROC_CHANGE_PENALTY;
 230 #endif
 231 
 232                 /* .. and a slight advantage to the current process */
 233                 if (p == current)
 234                         weight += 1;
 235         }
 236 
 237         return weight;
 238 }
 239 
 240 /*
 241  *  'schedule()' is the scheduler function. It's a very simple and nice
 242  * scheduler: it's not perfect, but certainly works for most things.
 243  *
 244  * The goto is "interesting".
 245  *
 246  *   NOTE!!  Task 0 is the 'idle' task, which gets called when no other
 247  * tasks can run. It can not be killed, and it cannot sleep. The 'state'
 248  * information in task[0] is never used.
 249  */
 250 asmlinkage void schedule(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 251 {
 252         int c;
 253         struct task_struct * p;
 254         struct task_struct * next;
 255         unsigned long timeout = 0;
 256         int this_cpu=smp_processor_id();
 257 
 258 /* check alarm, wake up any interruptible tasks that have got a signal */
 259 
 260         if (intr_count) {
 261                 printk("Aiee: scheduling in interrupt\n");
 262                 return;
 263         }
 264         if (bh_active & bh_mask) {
 265                 intr_count = 1;
 266                 do_bottom_half();
 267                 intr_count = 0;
 268         }
 269         run_task_queue(&tq_scheduler);
 270 
 271         need_resched = 0;
 272         cli();
 273         /* move an exhausted RR process to be last.. */
 274         if (!current->counter && current->policy == SCHED_RR) {
 275                 current->counter = current->priority;
 276                 move_last_runqueue(current);
 277         }
 278         switch (current->state) {
 279                 case TASK_INTERRUPTIBLE:
 280                         if (current->signal & ~current->blocked)
 281                                 goto makerunnable;
 282                         timeout = current->timeout;
 283                         if (timeout && (timeout <= jiffies)) {
 284                                 current->timeout = 0;
 285                                 timeout = 0;
 286                 makerunnable:
 287                                 current->state = TASK_RUNNING;
 288                                 break;
 289                         }
 290                 default:
 291                         del_from_runqueue(current);
 292                 case TASK_RUNNING:
 293         }
 294         p = init_task.next_run;
 295         sti();
 296         
 297 #ifdef __SMP__
 298         /*
 299          *      This is safe as we do not permit re-entry of schedule()
 300          */
 301         current->processor = NO_PROC_ID;        
 302 #endif  
 303 
 304 /*
 305  * Note! there may appear new tasks on the run-queue during this, as
 306  * interrupts are enabled. However, they will be put on front of the
 307  * list, so our list starting at "p" is essentially fixed.
 308  */
 309 /* this is the scheduler proper: */
 310         c = -1000;
 311         next = &init_task;
 312         while (p != &init_task) {
 313                 int weight = goodness(p, this_cpu);
 314                 if (weight > c)
 315                         c = weight, next = p;
 316                 p = p->next_run;
 317         }
 318 
 319         /* if all runnable processes have "counter == 0", re-calculate counters */
 320         if (!c) {
 321                 for_each_task(p)
 322                         p->counter = (p->counter >> 1) + p->priority;
 323         }
 324 #ifdef __SMP__  
 325         
 326         /*
 327          *      Context switching between two idle threads is pointless.
 328          */
 329         if(!current->pid && !next->pid)
 330                 next=current;
 331         /*
 332          *      Allocate process to CPU
 333          */
 334          
 335          next->processor = this_cpu;
 336          next->last_processor = this_cpu;
 337          
 338 #endif   
 339 #ifdef __SMP_PROF__ 
 340         /* mark processor running an idle thread */
 341         if (0==next->pid)
 342                 set_bit(this_cpu,&smp_idle_map);
 343         else
 344                 clear_bit(this_cpu,&smp_idle_map);
 345 #endif
 346         if (current != next) {
 347                 struct timer_list timer;
 348 
 349                 kstat.context_swtch++;
 350                 if (timeout) {
 351                         init_timer(&timer);
 352                         timer.expires = timeout;
 353                         timer.data = (unsigned long) current;
 354                         timer.function = process_timeout;
 355                         add_timer(&timer);
 356                 }
 357                 get_mmu_context(next);
 358                 switch_to(next);
 359                 if (timeout)
 360                         del_timer(&timer);
 361         }
 362 }
 363 
 364 #ifndef __alpha__
 365 
 366 /*
 367  * For backwards compatibility?  This can be done in libc so Alpha
 368  * and all newer ports shouldn't need it.
 369  */
 370 asmlinkage int sys_pause(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 371 {
 372         current->state = TASK_INTERRUPTIBLE;
 373         schedule();
 374         return -ERESTARTNOHAND;
 375 }
 376 
 377 #endif
 378 
 379 /*
 380  * wake_up doesn't wake up stopped processes - they have to be awakened
 381  * with signals or similar.
 382  *
 383  * Note that this doesn't need cli-sti pairs: interrupts may not change
 384  * the wait-queue structures directly, but only call wake_up() to wake
 385  * a process. The process itself must remove the queue once it has woken.
 386  */
 387 void wake_up(struct wait_queue **q)
     /* [previous][next][first][last][top][bottom][index][help] */
 388 {
 389         struct wait_queue *tmp;
 390         struct task_struct * p;
 391 
 392         if (!q || !(tmp = *q))
 393                 return;
 394         do {
 395                 if ((p = tmp->task) != NULL) {
 396                         if ((p->state == TASK_UNINTERRUPTIBLE) ||
 397                             (p->state == TASK_INTERRUPTIBLE))
 398                                 wake_up_process(p);
 399                 }
 400                 if (!tmp->next) {
 401                         printk("wait_queue is bad (eip = %p)\n",
 402                                 __builtin_return_address(0));
 403                         printk("        q = %p\n",q);
 404                         printk("       *q = %p\n",*q);
 405                         printk("      tmp = %p\n",tmp);
 406                         break;
 407                 }
 408                 tmp = tmp->next;
 409         } while (tmp != *q);
 410 }
 411 
 412 void wake_up_interruptible(struct wait_queue **q)
     /* [previous][next][first][last][top][bottom][index][help] */
 413 {
 414         struct wait_queue *tmp;
 415         struct task_struct * p;
 416 
 417         if (!q || !(tmp = *q))
 418                 return;
 419         do {
 420                 if ((p = tmp->task) != NULL) {
 421                         if (p->state == TASK_INTERRUPTIBLE)
 422                                 wake_up_process(p);
 423                 }
 424                 if (!tmp->next) {
 425                         printk("wait_queue is bad (eip = %p)\n",
 426                                 __builtin_return_address(0));
 427                         printk("        q = %p\n",q);
 428                         printk("       *q = %p\n",*q);
 429                         printk("      tmp = %p\n",tmp);
 430                         break;
 431                 }
 432                 tmp = tmp->next;
 433         } while (tmp != *q);
 434 }
 435 
 436 void __down(struct semaphore * sem)
     /* [previous][next][first][last][top][bottom][index][help] */
 437 {
 438         struct wait_queue wait = { current, NULL };
 439         add_wait_queue(&sem->wait, &wait);
 440         current->state = TASK_UNINTERRUPTIBLE;
 441         while (sem->count <= 0) {
 442                 schedule();
 443                 current->state = TASK_UNINTERRUPTIBLE;
 444         }
 445         current->state = TASK_RUNNING;
 446         remove_wait_queue(&sem->wait, &wait);
 447 }
 448 
 449 static inline void __sleep_on(struct wait_queue **p, int state)
     /* [previous][next][first][last][top][bottom][index][help] */
 450 {
 451         unsigned long flags;
 452         struct wait_queue wait = { current, NULL };
 453 
 454         if (!p)
 455                 return;
 456         if (current == task[0])
 457                 panic("task[0] trying to sleep");
 458         current->state = state;
 459         add_wait_queue(p, &wait);
 460         save_flags(flags);
 461         sti();
 462         schedule();
 463         remove_wait_queue(p, &wait);
 464         restore_flags(flags);
 465 }
 466 
 467 void interruptible_sleep_on(struct wait_queue **p)
     /* [previous][next][first][last][top][bottom][index][help] */
 468 {
 469         __sleep_on(p,TASK_INTERRUPTIBLE);
 470 }
 471 
 472 void sleep_on(struct wait_queue **p)
     /* [previous][next][first][last][top][bottom][index][help] */
 473 {
 474         __sleep_on(p,TASK_UNINTERRUPTIBLE);
 475 }
 476 
 477 /*
 478  * The head for the timer-list has a "expires" field of MAX_UINT,
 479  * and the sorting routine counts on this..
 480  */
 481 static struct timer_list timer_head = { &timer_head, &timer_head, ~0, 0, NULL };
 482 #define SLOW_BUT_DEBUGGING_TIMERS 0
 483 
 484 void add_timer(struct timer_list * timer)
     /* [previous][next][first][last][top][bottom][index][help] */
 485 {
 486         unsigned long flags;
 487         struct timer_list *p;
 488 
 489 #if SLOW_BUT_DEBUGGING_TIMERS
 490         if (timer->next || timer->prev) {
 491                 printk("add_timer() called with non-zero list from %p\n",
 492                         __builtin_return_address(0));
 493                 return;
 494         }
 495 #endif
 496         p = &timer_head;
 497         save_flags(flags);
 498         cli();
 499         do {
 500                 p = p->next;
 501         } while (timer->expires > p->expires);
 502         timer->next = p;
 503         timer->prev = p->prev;
 504         p->prev = timer;
 505         timer->prev->next = timer;
 506         restore_flags(flags);
 507 }
 508 
 509 int del_timer(struct timer_list * timer)
     /* [previous][next][first][last][top][bottom][index][help] */
 510 {
 511         unsigned long flags;
 512 #if SLOW_BUT_DEBUGGING_TIMERS
 513         struct timer_list * p;
 514 
 515         p = &timer_head;
 516         save_flags(flags);
 517         cli();
 518         while ((p = p->next) != &timer_head) {
 519                 if (p == timer) {
 520                         timer->next->prev = timer->prev;
 521                         timer->prev->next = timer->next;
 522                         timer->next = timer->prev = NULL;
 523                         restore_flags(flags);
 524                         return 1;
 525                 }
 526         }
 527         if (timer->next || timer->prev)
 528                 printk("del_timer() called from %p with timer not initialized\n",
 529                         __builtin_return_address(0));
 530         restore_flags(flags);
 531         return 0;
 532 #else
 533         struct timer_list * next;
 534         int ret = 0;
 535         save_flags(flags);
 536         cli();
 537         if ((next = timer->next) != NULL) {
 538                 (next->prev = timer->prev)->next = next;
 539                 timer->next = timer->prev = NULL;
 540                 ret = 1;
 541         }
 542         restore_flags(flags);
 543         return ret;
 544 #endif
 545 }
 546 
 547 unsigned long timer_active = 0;
 548 struct timer_struct timer_table[32];
 549 
 550 /*
 551  * Hmm.. Changed this, as the GNU make sources (load.c) seems to
 552  * imply that avenrun[] is the standard name for this kind of thing.
 553  * Nothing else seems to be standardized: the fractional size etc
 554  * all seem to differ on different machines.
 555  */
 556 unsigned long avenrun[3] = { 0,0,0 };
 557 
 558 /*
 559  * Nr of active tasks - counted in fixed-point numbers
 560  */
 561 static unsigned long count_active_tasks(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 562 {
 563         struct task_struct **p;
 564         unsigned long nr = 0;
 565 
 566         for(p = &LAST_TASK; p > &FIRST_TASK; --p)
 567                 if (*p && ((*p)->state == TASK_RUNNING ||
 568                            (*p)->state == TASK_UNINTERRUPTIBLE ||
 569                            (*p)->state == TASK_SWAPPING))
 570                         nr += FIXED_1;
 571 #ifdef __SMP__
 572         nr-=(smp_num_cpus-1)*FIXED_1;
 573 #endif                  
 574         return nr;
 575 }
 576 
 577 static inline void calc_load(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 578 {
 579         unsigned long active_tasks; /* fixed-point */
 580         static int count = LOAD_FREQ;
 581 
 582         if (count-- > 0)
 583                 return;
 584         count = LOAD_FREQ;
 585         active_tasks = count_active_tasks();
 586         CALC_LOAD(avenrun[0], EXP_1, active_tasks);
 587         CALC_LOAD(avenrun[1], EXP_5, active_tasks);
 588         CALC_LOAD(avenrun[2], EXP_15, active_tasks);
 589 }
 590 
 591 /*
 592  * this routine handles the overflow of the microsecond field
 593  *
 594  * The tricky bits of code to handle the accurate clock support
 595  * were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame.
 596  * They were originally developed for SUN and DEC kernels.
 597  * All the kudos should go to Dave for this stuff.
 598  *
 599  */
 600 static void second_overflow(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 601 {
 602     long ltemp;
 603 
 604     /* Bump the maxerror field */
 605     time_maxerror = (0x70000000-time_maxerror <
 606                      time_tolerance >> SHIFT_USEC) ?
 607         0x70000000 : (time_maxerror + (time_tolerance >> SHIFT_USEC));
 608 
 609     /*
 610      * Leap second processing. If in leap-insert state at
 611      * the end of the day, the system clock is set back one
 612      * second; if in leap-delete state, the system clock is
 613      * set ahead one second. The microtime() routine or
 614      * external clock driver will insure that reported time
 615      * is always monotonic. The ugly divides should be
 616      * replaced.
 617      */
 618     switch (time_state) {
 619 
 620     case TIME_OK:
 621         if (time_status & STA_INS)
 622             time_state = TIME_INS;
 623         else if (time_status & STA_DEL)
 624             time_state = TIME_DEL;
 625         break;
 626 
 627     case TIME_INS:
 628         if (xtime.tv_sec % 86400 == 0) {
 629             xtime.tv_sec--;
 630             time_state = TIME_OOP;
 631             printk("Clock: inserting leap second 23:59:60 UTC\n");
 632         }
 633         break;
 634 
 635     case TIME_DEL:
 636         if ((xtime.tv_sec + 1) % 86400 == 0) {
 637             xtime.tv_sec++;
 638             time_state = TIME_WAIT;
 639             printk("Clock: deleting leap second 23:59:59 UTC\n");
 640         }
 641         break;
 642 
 643     case TIME_OOP:
 644 
 645         time_state = TIME_WAIT;
 646         break;
 647 
 648     case TIME_WAIT:
 649         if (!(time_status & (STA_INS | STA_DEL)))
 650             time_state = TIME_OK;
 651     }
 652 
 653     /*
 654      * Compute the phase adjustment for the next second. In
 655      * PLL mode, the offset is reduced by a fixed factor
 656      * times the time constant. In FLL mode the offset is
 657      * used directly. In either mode, the maximum phase
 658      * adjustment for each second is clamped so as to spread
 659      * the adjustment over not more than the number of
 660      * seconds between updates.
 661      */
 662     if (time_offset < 0) {
 663         ltemp = -time_offset;
 664         if (!(time_status & STA_FLL))
 665             ltemp >>= SHIFT_KG + time_constant;
 666         if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE)
 667             ltemp = (MAXPHASE / MINSEC) <<
 668                 SHIFT_UPDATE;
 669         time_offset += ltemp;
 670         time_adj = -ltemp << (SHIFT_SCALE - SHIFT_HZ -
 671                               SHIFT_UPDATE);
 672     } else {
 673         ltemp = time_offset;
 674         if (!(time_status & STA_FLL))
 675             ltemp >>= SHIFT_KG + time_constant;
 676         if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE)
 677             ltemp = (MAXPHASE / MINSEC) <<
 678                 SHIFT_UPDATE;
 679         time_offset -= ltemp;
 680         time_adj = ltemp << (SHIFT_SCALE - SHIFT_HZ -
 681                              SHIFT_UPDATE);
 682     }
 683 
 684     /*
 685      * Compute the frequency estimate and additional phase
 686      * adjustment due to frequency error for the next
 687      * second. When the PPS signal is engaged, gnaw on the
 688      * watchdog counter and update the frequency computed by
 689      * the pll and the PPS signal.
 690      */
 691     pps_valid++;
 692     if (pps_valid == PPS_VALID) {
 693         pps_jitter = MAXTIME;
 694         pps_stabil = MAXFREQ;
 695         time_status &= ~(STA_PPSSIGNAL | STA_PPSJITTER |
 696                          STA_PPSWANDER | STA_PPSERROR);
 697     }
 698     ltemp = time_freq + pps_freq;
 699     if (ltemp < 0)
 700         time_adj -= -ltemp >>
 701             (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE);
 702     else
 703         time_adj += ltemp >>
 704             (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE);
 705 
 706 #if HZ == 100
 707     /* compensate for (HZ==100) != 128. Add 25% to get 125; => only 3% error */
 708     if (time_adj < 0)
 709         time_adj -= -time_adj >> 2;
 710     else
 711         time_adj += time_adj >> 2;
 712 #endif
 713 }
 714 
 715 /*
 716  * disregard lost ticks for now.. We don't care enough.
 717  */
 718 static void timer_bh(void * unused)
     /* [previous][next][first][last][top][bottom][index][help] */
 719 {
 720         unsigned long mask;
 721         struct timer_struct *tp;
 722         struct timer_list * timer;
 723 
 724         cli();
 725         while ((timer = timer_head.next) != &timer_head && timer->expires <= jiffies) {
 726                 void (*fn)(unsigned long) = timer->function;
 727                 unsigned long data = timer->data;
 728                 timer->next->prev = timer->prev;
 729                 timer->prev->next = timer->next;
 730                 timer->next = timer->prev = NULL;
 731                 sti();
 732                 fn(data);
 733                 cli();
 734         }
 735         sti();
 736         
 737         for (mask = 1, tp = timer_table+0 ; mask ; tp++,mask += mask) {
 738                 if (mask > timer_active)
 739                         break;
 740                 if (!(mask & timer_active))
 741                         continue;
 742                 if (tp->expires > jiffies)
 743                         continue;
 744                 timer_active &= ~mask;
 745                 tp->fn();
 746                 sti();
 747         }
 748 }
 749 
 750 void tqueue_bh(void * unused)
     /* [previous][next][first][last][top][bottom][index][help] */
 751 {
 752         run_task_queue(&tq_timer);
 753 }
 754 
 755 void immediate_bh(void * unused)
     /* [previous][next][first][last][top][bottom][index][help] */
 756 {
 757         run_task_queue(&tq_immediate);
 758 }
 759 
 760 void do_timer(struct pt_regs * regs)
     /* [previous][next][first][last][top][bottom][index][help] */
 761 {
 762         unsigned long mask;
 763         struct timer_struct *tp;
 764         long ltemp, psecs;
 765 #ifdef  __SMP_PROF__
 766         int cpu,i;
 767 #endif
 768 
 769         /* Advance the phase, once it gets to one microsecond, then
 770          * advance the tick more.
 771          */
 772         time_phase += time_adj;
 773         if (time_phase <= -FINEUSEC) {
 774                 ltemp = -time_phase >> SHIFT_SCALE;
 775                 time_phase += ltemp << SHIFT_SCALE;
 776                 xtime.tv_usec += tick + time_adjust_step - ltemp;
 777         }
 778         else if (time_phase >= FINEUSEC) {
 779                 ltemp = time_phase >> SHIFT_SCALE;
 780                 time_phase -= ltemp << SHIFT_SCALE;
 781                 xtime.tv_usec += tick + time_adjust_step + ltemp;
 782         } else
 783                 xtime.tv_usec += tick + time_adjust_step;
 784 
 785         if (time_adjust) {
 786             /* We are doing an adjtime thing. 
 787              *
 788              * Modify the value of the tick for next time.
 789              * Note that a positive delta means we want the clock
 790              * to run fast. This means that the tick should be bigger
 791              *
 792              * Limit the amount of the step for *next* tick to be
 793              * in the range -tickadj .. +tickadj
 794              */
 795              if (time_adjust > tickadj)
 796                time_adjust_step = tickadj;
 797              else if (time_adjust < -tickadj)
 798                time_adjust_step = -tickadj;
 799              else
 800                time_adjust_step = time_adjust;
 801              
 802             /* Reduce by this step the amount of time left  */
 803             time_adjust -= time_adjust_step;
 804         }
 805         else
 806             time_adjust_step = 0;
 807 
 808         if (xtime.tv_usec >= 1000000) {
 809             xtime.tv_usec -= 1000000;
 810             xtime.tv_sec++;
 811             second_overflow();
 812         }
 813 
 814         jiffies++;
 815         calc_load();
 816 #ifdef  __SMP_PROF__
 817         smp_idle_count[NR_CPUS]++;    /* count timer ticks */
 818         cpu = smp_processor_id();
 819         for (i=0;i<(0==smp_num_cpus?1:smp_num_cpus);i++) 
 820                 if (test_bit(i,&smp_idle_map)) smp_idle_count[i]++;
 821 #endif
 822         if (user_mode(regs)) {
 823                 current->utime++;
 824                 if (current->pid) {
 825                         if (current->priority < DEF_PRIORITY)
 826                                 kstat.cpu_nice++;
 827                         else
 828                                 kstat.cpu_user++;
 829                 }
 830                 /* Update ITIMER_VIRT for current task if not in a system call */
 831                 if (current->it_virt_value && !(--current->it_virt_value)) {
 832                         current->it_virt_value = current->it_virt_incr;
 833                         send_sig(SIGVTALRM,current,1);
 834                 }
 835         } else {
 836                 current->stime++;
 837                 if(current->pid)
 838                         kstat.cpu_system++;
 839                 if (prof_buffer && current->pid) {
 840                         extern int _stext;
 841                         unsigned long ip = instruction_pointer(regs);
 842                         ip -= (unsigned long) &_stext;
 843                         ip >>= prof_shift;
 844                         if (ip < prof_len)
 845                                 prof_buffer[ip]++;
 846                 }
 847         }
 848         /*
 849          * check the cpu time limit on the process.
 850          */
 851         if ((current->rlim[RLIMIT_CPU].rlim_max != RLIM_INFINITY) &&
 852             (((current->stime + current->utime) / HZ) >= current->rlim[RLIMIT_CPU].rlim_max))
 853                 send_sig(SIGKILL, current, 1);
 854         if ((current->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) &&
 855             (((current->stime + current->utime) % HZ) == 0)) {
 856                 psecs = (current->stime + current->utime) / HZ;
 857                 /* send when equal */
 858                 if (psecs == current->rlim[RLIMIT_CPU].rlim_cur)
 859                         send_sig(SIGXCPU, current, 1);
 860                 /* and every five seconds thereafter. */
 861                 else if ((psecs > current->rlim[RLIMIT_CPU].rlim_cur) &&
 862                         ((psecs - current->rlim[RLIMIT_CPU].rlim_cur) % 5) == 0)
 863                         send_sig(SIGXCPU, current, 1);
 864         }
 865 
 866         if (current->pid && 0 > --current->counter) {
 867                 current->counter = 0;
 868                 need_resched = 1;
 869         }
 870         /* Update ITIMER_PROF for the current task */
 871         if (current->it_prof_value && !(--current->it_prof_value)) {
 872                 current->it_prof_value = current->it_prof_incr;
 873                 send_sig(SIGPROF,current,1);
 874         }
 875         for (mask = 1, tp = timer_table+0 ; mask ; tp++,mask += mask) {
 876                 if (mask > timer_active)
 877                         break;
 878                 if (!(mask & timer_active))
 879                         continue;
 880                 if (tp->expires > jiffies)
 881                         continue;
 882                 mark_bh(TIMER_BH);
 883         }
 884         cli();
 885         if (timer_head.next->expires <= jiffies)
 886                 mark_bh(TIMER_BH);
 887         if (tq_timer != &tq_last)
 888                 mark_bh(TQUEUE_BH);
 889         sti();
 890 }
 891 
 892 #ifndef __alpha__
 893 
 894 /*
 895  * For backwards compatibility?  This can be done in libc so Alpha
 896  * and all newer ports shouldn't need it.
 897  */
 898 asmlinkage unsigned int sys_alarm(unsigned int seconds)
     /* [previous][next][first][last][top][bottom][index][help] */
 899 {
 900         struct itimerval it_new, it_old;
 901         unsigned int oldalarm;
 902 
 903         it_new.it_interval.tv_sec = it_new.it_interval.tv_usec = 0;
 904         it_new.it_value.tv_sec = seconds;
 905         it_new.it_value.tv_usec = 0;
 906         _setitimer(ITIMER_REAL, &it_new, &it_old);
 907         oldalarm = it_old.it_value.tv_sec;
 908         /* ehhh.. We can't return 0 if we have an alarm pending.. */
 909         /* And we'd better return too much than too little anyway */
 910         if (it_old.it_value.tv_usec)
 911                 oldalarm++;
 912         return oldalarm;
 913 }
 914 
 915 /*
 916  * The Alpha uses getxpid, getxuid, and getxgid instead.  Maybe this
 917  * should be moved into arch/i386 instead?
 918  */
 919 asmlinkage int sys_getpid(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 920 {
 921         return current->pid;
 922 }
 923 
 924 asmlinkage int sys_getppid(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 925 {
 926         return current->p_opptr->pid;
 927 }
 928 
 929 asmlinkage int sys_getuid(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 930 {
 931         return current->uid;
 932 }
 933 
 934 asmlinkage int sys_geteuid(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 935 {
 936         return current->euid;
 937 }
 938 
 939 asmlinkage int sys_getgid(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 940 {
 941         return current->gid;
 942 }
 943 
 944 asmlinkage int sys_getegid(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 945 {
 946         return current->egid;
 947 }
 948 
 949 /*
 950  * This has been replaced by sys_setpriority.  Maybe it should be
 951  * moved into the arch depedent tree for those ports that require
 952  * it for backward compatibility?
 953  */
 954 asmlinkage int sys_nice(int increment)
     /* [previous][next][first][last][top][bottom][index][help] */
 955 {
 956         unsigned long newprio;
 957         int increase = 0;
 958 
 959         newprio = increment;
 960         if (increment < 0) {
 961                 if (!suser())
 962                         return -EPERM;
 963                 newprio = -increment;
 964                 increase = 1;
 965         }
 966         if (newprio > 40)
 967                 newprio = 40;
 968         /*
 969          * do a "normalization" of the priority (traditionally
 970          * unix nice values are -20..20, linux doesn't really
 971          * use that kind of thing, but uses the length of the
 972          * timeslice instead (default 150 msec). The rounding is
 973          * why we want to avoid negative values.
 974          */
 975         newprio = (newprio * DEF_PRIORITY + 10) / 20;
 976         increment = newprio;
 977         if (increase)
 978                 increment = -increment;
 979         newprio = current->priority - increment;
 980         if (newprio < 1)
 981                 newprio = 1;
 982         if (newprio > DEF_PRIORITY*2)
 983                 newprio = DEF_PRIORITY*2;
 984         current->priority = newprio;
 985         return 0;
 986 }
 987 
 988 #endif
 989 
 990 static struct task_struct *find_process_by_pid(pid_t pid) {
     /* [previous][next][first][last][top][bottom][index][help] */
 991         struct task_struct *p, *q;
 992 
 993         if (pid == 0)
 994                 p = current;
 995         else {
 996                 p = 0;
 997                 for_each_task(q) {
 998                         if (q && q->pid == pid) {
 999                                 p = q;
1000                                 break;
1001                         }
1002                 }
1003         }
1004         return p;
1005 }
1006 
1007 static int setscheduler(pid_t pid, int policy, 
     /* [previous][next][first][last][top][bottom][index][help] */
1008                         struct sched_param *param)
1009 {
1010         int error;
1011         struct sched_param lp;
1012         struct task_struct *p;
1013 
1014         if (!param || pid < 0)
1015                 return -EINVAL;
1016 
1017         error = verify_area(VERIFY_READ, param, sizeof(struct sched_param));
1018         if (error)
1019                 return error;
1020         memcpy_fromfs(&lp, param, sizeof(struct sched_param));
1021 
1022         p = find_process_by_pid(pid);
1023         if (!p)
1024                 return -ESRCH;
1025                         
1026         if (policy < 0)
1027                 policy = p->policy;
1028         else if (policy != SCHED_FIFO && policy != SCHED_RR &&
1029                  policy != SCHED_OTHER)
1030                 return -EINVAL;
1031         
1032         /*
1033          * Valid priorities for SCHED_FIFO and SCHED_RR are 1..99, valid
1034          * priority for SCHED_OTHER is 0.
1035          */
1036         if (lp.sched_priority < 0 || lp.sched_priority > 99)
1037                 return -EINVAL;
1038         if ((policy == SCHED_OTHER) != (lp.sched_priority == 0))
1039                 return -EINVAL;
1040 
1041         if ((policy == SCHED_FIFO || policy == SCHED_RR) && !suser())
1042                 return -EPERM;
1043         if ((current->euid != p->euid) && (current->euid != p->uid) &&
1044             !suser())
1045                 return -EPERM;
1046 
1047         p->policy = policy;
1048         p->rt_priority = lp.sched_priority;
1049         schedule();
1050 
1051         return 0;
1052 }
1053 
1054 asmlinkage int sys_sched_setscheduler(pid_t pid, int policy, 
     /* [previous][next][first][last][top][bottom][index][help] */
1055                                       struct sched_param *param)
1056 {
1057         return setscheduler(pid, policy, param);
1058 }
1059 
1060 asmlinkage int sys_sched_setparam(pid_t pid, struct sched_param *param)
     /* [previous][next][first][last][top][bottom][index][help] */
1061 {
1062         return setscheduler(pid, -1, param);
1063 }
1064 
1065 asmlinkage int sys_sched_getscheduler(pid_t pid)
     /* [previous][next][first][last][top][bottom][index][help] */
1066 {
1067         struct task_struct *p;
1068 
1069         if (pid < 0)
1070                 return -EINVAL;
1071 
1072         p = find_process_by_pid(pid);
1073         if (!p)
1074                 return -ESRCH;
1075                         
1076         return p->policy;
1077 }
1078 
1079 asmlinkage int sys_sched_getparam(pid_t pid, struct sched_param *param)
     /* [previous][next][first][last][top][bottom][index][help] */
1080 {
1081         int error;
1082         struct task_struct *p;
1083         struct sched_param lp;
1084 
1085         if (!param || pid < 0)
1086                 return -EINVAL;
1087 
1088         error = verify_area(VERIFY_WRITE, param, sizeof(struct sched_param));
1089         if (error)
1090                 return error;
1091 
1092         p = find_process_by_pid(pid);
1093         if (!p)
1094                 return -ESRCH;
1095 
1096         lp.sched_priority = p->rt_priority;
1097         memcpy_tofs(param, &lp, sizeof(struct sched_param));
1098 
1099         return 0;
1100 }
1101 
1102 asmlinkage int sys_sched_yield(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1103 {
1104         /* ... not yet implemented ... */
1105         return -ENOSYS;
1106 }
1107 
1108 asmlinkage int sys_sched_get_priority_max(int policy)
     /* [previous][next][first][last][top][bottom][index][help] */
1109 {
1110         switch (policy) {
1111               case SCHED_FIFO:
1112               case SCHED_RR:
1113                 return 99;
1114               case SCHED_OTHER:
1115                 return 0;
1116         }
1117 
1118         return -EINVAL;
1119 }
1120 
1121 asmlinkage int sys_sched_get_priority_min(int policy)
     /* [previous][next][first][last][top][bottom][index][help] */
1122 {
1123         switch (policy) {
1124               case SCHED_FIFO:
1125               case SCHED_RR:
1126                 return 1;
1127               case SCHED_OTHER:
1128                 return 0;
1129         }
1130 
1131         return -EINVAL;
1132 }
1133 
1134 asmlinkage int sys_sched_rr_get_interval(pid_t pid, struct timespec *interval)
     /* [previous][next][first][last][top][bottom][index][help] */
1135 {
1136         int error;
1137         struct timespec t;
1138 
1139         error = verify_area(VERIFY_WRITE, interval, sizeof(struct timespec));
1140         if (error)
1141                 return error;
1142         
1143         t.tv_sec = 0;
1144         t.tv_nsec = 0;   /* <-- Linus, please fill correct value in here */
1145         return -ENOSYS;  /* and then delete this line. Thanks!           */
1146         memcpy_tofs(interval, &t, sizeof(struct timespec));
1147 
1148         return 0;
1149 }
1150 
1151 static void show_task(int nr,struct task_struct * p)
     /* [previous][next][first][last][top][bottom][index][help] */
1152 {
1153         unsigned long free;
1154         static const char * stat_nam[] = { "R", "S", "D", "Z", "T", "W" };
1155 
1156         printk("%-8s %3d ", p->comm, (p == current) ? -nr : nr);
1157         if (((unsigned) p->state) < sizeof(stat_nam)/sizeof(char *))
1158                 printk(stat_nam[p->state]);
1159         else
1160                 printk(" ");
1161 #if ((~0UL) == 0xffffffff)
1162         if (p == current)
1163                 printk(" current  ");
1164         else
1165                 printk(" %08lX ", thread_saved_pc(&p->tss));
1166 #else
1167         if (p == current)
1168                 printk("   current task   ");
1169         else
1170                 printk(" %016lx ", thread_saved_pc(&p->tss));
1171 #endif
1172         for (free = 1; free < PAGE_SIZE/sizeof(long) ; free++) {
1173                 if (((unsigned long *)p->kernel_stack_page)[free])
1174                         break;
1175         }
1176         printk("%5lu %5d %6d ", free*sizeof(long), p->pid, p->p_pptr->pid);
1177         if (p->p_cptr)
1178                 printk("%5d ", p->p_cptr->pid);
1179         else
1180                 printk("      ");
1181         if (p->p_ysptr)
1182                 printk("%7d", p->p_ysptr->pid);
1183         else
1184                 printk("       ");
1185         if (p->p_osptr)
1186                 printk(" %5d\n", p->p_osptr->pid);
1187         else
1188                 printk("\n");
1189 }
1190 
1191 void show_state(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1192 {
1193         int i;
1194 
1195 #if ((~0UL) == 0xffffffff)
1196         printk("\n"
1197                "                         free                        sibling\n");
1198         printk("  task             PC    stack   pid father child younger older\n");
1199 #else
1200         printk("\n"
1201                "                                 free                        sibling\n");
1202         printk("  task                 PC        stack   pid father child younger older\n");
1203 #endif
1204         for (i=0 ; i<NR_TASKS ; i++)
1205                 if (task[i])
1206                         show_task(i,task[i]);
1207 }
1208 
1209 void sched_init(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1210 {
1211         /*
1212          *      We have to do a little magic to get the first
1213          *      process right in SMP mode.
1214          */
1215         int cpu=smp_processor_id();
1216         current_set[cpu]=&init_task;
1217 #ifdef __SMP__  
1218         init_task.processor=cpu;
1219 #endif
1220         bh_base[TIMER_BH].routine = timer_bh;
1221         bh_base[TQUEUE_BH].routine = tqueue_bh;
1222         bh_base[IMMEDIATE_BH].routine = immediate_bh;
1223         enable_bh(TIMER_BH);
1224         enable_bh(TQUEUE_BH);
1225         enable_bh(IMMEDIATE_BH);
1226 }

/* [previous][next][first][last][top][bottom][index][help] */