root/kernel/sched.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. add_to_runqueue
  2. del_from_runqueue
  3. move_last_runqueue
  4. wake_up_process
  5. process_timeout
  6. goodness
  7. schedule
  8. sys_pause
  9. wake_up
  10. wake_up_interruptible
  11. __down
  12. __sleep_on
  13. interruptible_sleep_on
  14. sleep_on
  15. add_timer
  16. del_timer
  17. count_active_tasks
  18. calc_load
  19. second_overflow
  20. timer_bh
  21. tqueue_bh
  22. immediate_bh
  23. do_timer
  24. sys_alarm
  25. sys_getpid
  26. sys_getppid
  27. sys_getuid
  28. sys_geteuid
  29. sys_getgid
  30. sys_getegid
  31. sys_nice
  32. find_process_by_pid
  33. setscheduler
  34. sys_sched_setscheduler
  35. sys_sched_setparam
  36. sys_sched_getscheduler
  37. sys_sched_getparam
  38. sys_sched_yield
  39. sys_sched_get_priority_max
  40. sys_sched_get_priority_min
  41. sys_sched_rr_get_interval
  42. show_task
  43. show_state
  44. sched_init

   1 /*
   2  *  linux/kernel/sched.c
   3  *
   4  *  Copyright (C) 1991, 1992  Linus Torvalds
   5  */
   6 
   7 /*
   8  * 'sched.c' is the main kernel file. It contains scheduling primitives
   9  * (sleep_on, wakeup, schedule etc) as well as a number of simple system
  10  * call functions (type getpid(), which just extracts a field from
  11  * current-task
  12  */
  13 
  14 #include <linux/signal.h>
  15 #include <linux/sched.h>
  16 #include <linux/timer.h>
  17 #include <linux/kernel.h>
  18 #include <linux/kernel_stat.h>
  19 #include <linux/fdreg.h>
  20 #include <linux/errno.h>
  21 #include <linux/time.h>
  22 #include <linux/ptrace.h>
  23 #include <linux/delay.h>
  24 #include <linux/interrupt.h>
  25 #include <linux/tqueue.h>
  26 #include <linux/resource.h>
  27 #include <linux/mm.h>
  28 #include <linux/smp.h>
  29 
  30 #include <asm/system.h>
  31 #include <asm/io.h>
  32 #include <asm/segment.h>
  33 #include <asm/pgtable.h>
  34 #include <asm/mmu_context.h>
  35 
  36 #include <linux/timex.h>
  37 
  38 /*
  39  * kernel variables
  40  */
  41 
  42 int securelevel = 0;                    /* system security level */
  43 
  44 long tick = 1000000 / HZ;               /* timer interrupt period */
  45 volatile struct timeval xtime;          /* The current time */
  46 int tickadj = 500/HZ;                   /* microsecs */
  47 
  48 DECLARE_TASK_QUEUE(tq_timer);
  49 DECLARE_TASK_QUEUE(tq_immediate);
  50 DECLARE_TASK_QUEUE(tq_scheduler);
  51 
  52 /*
  53  * phase-lock loop variables
  54  */
  55 int time_state = TIME_BAD;     /* clock synchronization status */
  56 int time_status = STA_UNSYNC | STA_PLL; /* clock status bits */
  57 long time_offset = 0;           /* time adjustment (us) */
  58 long time_constant = 2;         /* pll time constant */
  59 long time_tolerance = MAXFREQ;  /* frequency tolerance (ppm) */
  60 long time_precision = 1;        /* clock precision (us) */
  61 long time_maxerror = 0x70000000;/* maximum error */
  62 long time_esterror = 0x70000000;/* estimated error */
  63 long time_phase = 0;            /* phase offset (scaled us) */
  64 long time_freq = 0;             /* frequency offset (scaled ppm) */
  65 long time_adj = 0;              /* tick adjust (scaled 1 / HZ) */
  66 long time_reftime = 0;          /* time at last adjustment (s) */
  67 
  68 long time_adjust = 0;
  69 long time_adjust_step = 0;
  70 
  71 int need_resched = 0;
  72 unsigned long event = 0;
  73 
  74 extern int _setitimer(int, struct itimerval *, struct itimerval *);
  75 unsigned long * prof_buffer = NULL;
  76 unsigned long prof_len = 0;
  77 unsigned long prof_shift = 0;
  78 
  79 #define _S(nr) (1<<((nr)-1))
  80 
  81 extern void mem_use(void);
  82 
  83 static unsigned long init_kernel_stack[1024] = { STACK_MAGIC, };
  84 unsigned long init_user_stack[1024] = { STACK_MAGIC, };
  85 static struct vm_area_struct init_mmap = INIT_MMAP;
  86 static struct fs_struct init_fs = INIT_FS;
  87 static struct files_struct init_files = INIT_FILES;
  88 static struct signal_struct init_signals = INIT_SIGNALS;
  89 
  90 struct mm_struct init_mm = INIT_MM;
  91 struct task_struct init_task = INIT_TASK;
  92 
  93 unsigned long volatile jiffies=0;
  94 
  95 struct task_struct *current_set[NR_CPUS];
  96 struct task_struct *last_task_used_math = NULL;
  97 
  98 struct task_struct * task[NR_TASKS] = {&init_task, };
  99 
 100 struct kernel_stat kstat = { 0 };
 101 
 102 static inline void add_to_runqueue(struct task_struct * p)
     /* [previous][next][first][last][top][bottom][index][help] */
 103 {
 104 #if 1   /* sanity tests */
 105         if (p->next_run || p->prev_run) {
 106                 printk("task already on run-queue\n");
 107                 return;
 108         }
 109 #endif
 110         if (p->counter > current->counter + 3)
 111                 need_resched = 1;
 112         nr_running++;
 113         (p->prev_run = init_task.prev_run)->next_run = p;
 114         p->next_run = &init_task;
 115         init_task.prev_run = p;
 116 }
 117 
 118 static inline void del_from_runqueue(struct task_struct * p)
     /* [previous][next][first][last][top][bottom][index][help] */
 119 {
 120         struct task_struct *next = p->next_run;
 121         struct task_struct *prev = p->prev_run;
 122 
 123 #if 1   /* sanity tests */
 124         if (!next || !prev) {
 125                 printk("task not on run-queue\n");
 126                 return;
 127         }
 128 #endif
 129         if (p == &init_task) {
 130                 static int nr = 0;
 131                 if (nr < 5) {
 132                         nr++;
 133                         printk("idle task may not sleep\n");
 134                 }
 135                 return;
 136         }
 137         nr_running--;
 138         next->prev_run = prev;
 139         prev->next_run = next;
 140         p->next_run = NULL;
 141         p->prev_run = NULL;
 142 }
 143 
 144 static inline void move_last_runqueue(struct task_struct * p)
     /* [previous][next][first][last][top][bottom][index][help] */
 145 {
 146         struct task_struct *next = p->next_run;
 147         struct task_struct *prev = p->prev_run;
 148 
 149         next->prev_run = prev;
 150         prev->next_run = next;
 151         (p->prev_run = init_task.prev_run)->next_run = p;
 152         p->next_run = &init_task;
 153         init_task.prev_run = p;
 154 }
 155 
 156 /*
 157  * Wake up a process. Put it on the run-queue if it's not
 158  * already there.  The "current" process is always on the
 159  * run-queue (except when the actual re-schedule is in
 160  * progress), and as such you're allowed to do the simpler
 161  * "current->state = TASK_RUNNING" to mark yourself runnable
 162  * without the overhead of this.
 163  */
 164 inline void wake_up_process(struct task_struct * p)
     /* [previous][next][first][last][top][bottom][index][help] */
 165 {
 166         unsigned long flags;
 167 
 168         save_flags(flags);
 169         cli();
 170         p->state = TASK_RUNNING;
 171         if (!p->next_run)
 172                 add_to_runqueue(p);
 173         restore_flags(flags);
 174 }
 175 
 176 static void process_timeout(unsigned long __data)
     /* [previous][next][first][last][top][bottom][index][help] */
 177 {
 178         struct task_struct * p = (struct task_struct *) __data;
 179 
 180         p->timeout = 0;
 181         wake_up_process(p);
 182 }
 183 
 184 /*
 185  * This is the function that decides how desireable a process is..
 186  * You can weigh different processes against each other depending
 187  * on what CPU they've run on lately etc to try to handle cache
 188  * and TLB miss penalties.
 189  *
 190  * Return values:
 191  *       -1000: never select this
 192  *           0: out of time, recalculate counters (but it might still be
 193  *              selected)
 194  *         +ve: "goodness" value (the larger, the better)
 195  *       +1000: realtime process, select this.
 196  */
 197 static inline int goodness(struct task_struct * p, int this_cpu)
     /* [previous][next][first][last][top][bottom][index][help] */
 198 {
 199         int weight;
 200 
 201 #ifdef __SMP__  
 202         /* We are not permitted to run a task someone else is running */
 203         if (p->processor != NO_PROC_ID)
 204                 return -1000;
 205 #endif
 206 
 207         /*
 208          * Realtime process, select the first one on the
 209          * runqueue (taking priorities within processes
 210          * into account).
 211          */
 212         if (p->policy != SCHED_OTHER)
 213                 return 1000 + p->rt_priority;
 214 
 215         /*
 216          * Give the process a first-approximation goodness value
 217          * according to the number of clock-ticks it has left.
 218          *
 219          * Don't do any other calculations if the time slice is
 220          * over..
 221          */
 222         weight = p->counter;
 223         if (weight) {
 224                         
 225 #ifdef __SMP__
 226                 /* Give a largish advantage to the same processor...   */
 227                 /* (this is equivalent to penalizing other processors) */
 228                 if (p->last_processor == this_cpu)
 229                         weight += PROC_CHANGE_PENALTY;
 230 #endif
 231 
 232                 /* .. and a slight advantage to the current process */
 233                 if (p == current)
 234                         weight += 1;
 235         }
 236 
 237         return weight;
 238 }
 239 
 240 /*
 241  *  'schedule()' is the scheduler function. It's a very simple and nice
 242  * scheduler: it's not perfect, but certainly works for most things.
 243  *
 244  * The goto is "interesting".
 245  *
 246  *   NOTE!!  Task 0 is the 'idle' task, which gets called when no other
 247  * tasks can run. It can not be killed, and it cannot sleep. The 'state'
 248  * information in task[0] is never used.
 249  */
 250 asmlinkage void schedule(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 251 {
 252         int c;
 253         struct task_struct * p;
 254         struct task_struct * next;
 255         unsigned long timeout = 0;
 256         int this_cpu=smp_processor_id();
 257 
 258 /* check alarm, wake up any interruptible tasks that have got a signal */
 259 
 260         if (intr_count) {
 261                 printk("Aiee: scheduling in interrupt\n");
 262                 return;
 263         }
 264         if (bh_active & bh_mask) {
 265                 intr_count = 1;
 266                 do_bottom_half();
 267                 intr_count = 0;
 268         }
 269         run_task_queue(&tq_scheduler);
 270 
 271         need_resched = 0;
 272         cli();
 273         /* move an exhausted RR process to be last.. */
 274         if (!current->counter && current->policy == SCHED_RR) {
 275                 current->counter = current->priority;
 276                 move_last_runqueue(current);
 277         }
 278         switch (current->state) {
 279                 case TASK_INTERRUPTIBLE:
 280                         if (current->signal & ~current->blocked)
 281                                 goto makerunnable;
 282                         timeout = current->timeout;
 283                         if (timeout && (timeout <= jiffies)) {
 284                                 current->timeout = 0;
 285                                 timeout = 0;
 286                 makerunnable:
 287                                 current->state = TASK_RUNNING;
 288                                 break;
 289                         }
 290                 default:
 291                         del_from_runqueue(current);
 292                 case TASK_RUNNING:
 293         }
 294         p = init_task.next_run;
 295         sti();
 296         
 297 #ifdef __SMP__
 298         /*
 299          *      This is safe as we do not permit re-entry of schedule()
 300          */
 301         current->processor = NO_PROC_ID;        
 302 #endif  
 303 
 304 /*
 305  * Note! there may appear new tasks on the run-queue during this, as
 306  * interrupts are enabled. However, they will be put on front of the
 307  * list, so our list starting at "p" is essentially fixed.
 308  */
 309 /* this is the scheduler proper: */
 310         c = -1000;
 311         next = &init_task;
 312         while (p != &init_task) {
 313                 int weight = goodness(p, this_cpu);
 314                 if (weight > c)
 315                         c = weight, next = p;
 316                 p = p->next_run;
 317         }
 318 
 319         /* if all runnable processes have "counter == 0", re-calculate counters */
 320         if (!c) {
 321                 for_each_task(p)
 322                         p->counter = (p->counter >> 1) + p->priority;
 323         }
 324 #ifdef __SMP__  
 325         
 326         /*
 327          *      Context switching between two idle threads is pointless.
 328          */
 329         if(!current->pid && !next->pid)
 330                 next=current;
 331         /*
 332          *      Allocate process to CPU
 333          */
 334          
 335          next->processor = this_cpu;
 336          next->last_processor = this_cpu;
 337          
 338 #endif   
 339 #ifdef __SMP_PROF__ 
 340         /* mark processor running an idle thread */
 341         if (0==next->pid)
 342                 set_bit(this_cpu,&smp_idle_map);
 343         else
 344                 clear_bit(this_cpu,&smp_idle_map);
 345 #endif
 346         if (current != next) {
 347                 struct timer_list timer;
 348 
 349                 kstat.context_swtch++;
 350                 if (timeout) {
 351                         init_timer(&timer);
 352                         timer.expires = timeout;
 353                         timer.data = (unsigned long) current;
 354                         timer.function = process_timeout;
 355                         add_timer(&timer);
 356                 }
 357                 get_mmu_context(next);
 358                 switch_to(next);
 359                 if (timeout)
 360                         del_timer(&timer);
 361         }
 362 }
 363 
 364 asmlinkage int sys_pause(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 365 {
 366         current->state = TASK_INTERRUPTIBLE;
 367         schedule();
 368         return -ERESTARTNOHAND;
 369 }
 370 
 371 /*
 372  * wake_up doesn't wake up stopped processes - they have to be awakened
 373  * with signals or similar.
 374  *
 375  * Note that this doesn't need cli-sti pairs: interrupts may not change
 376  * the wait-queue structures directly, but only call wake_up() to wake
 377  * a process. The process itself must remove the queue once it has woken.
 378  */
 379 void wake_up(struct wait_queue **q)
     /* [previous][next][first][last][top][bottom][index][help] */
 380 {
 381         struct wait_queue *tmp;
 382         struct task_struct * p;
 383 
 384         if (!q || !(tmp = *q))
 385                 return;
 386         do {
 387                 if ((p = tmp->task) != NULL) {
 388                         if ((p->state == TASK_UNINTERRUPTIBLE) ||
 389                             (p->state == TASK_INTERRUPTIBLE))
 390                                 wake_up_process(p);
 391                 }
 392                 if (!tmp->next) {
 393                         printk("wait_queue is bad (eip = %p)\n",
 394                                 __builtin_return_address(0));
 395                         printk("        q = %p\n",q);
 396                         printk("       *q = %p\n",*q);
 397                         printk("      tmp = %p\n",tmp);
 398                         break;
 399                 }
 400                 tmp = tmp->next;
 401         } while (tmp != *q);
 402 }
 403 
 404 void wake_up_interruptible(struct wait_queue **q)
     /* [previous][next][first][last][top][bottom][index][help] */
 405 {
 406         struct wait_queue *tmp;
 407         struct task_struct * p;
 408 
 409         if (!q || !(tmp = *q))
 410                 return;
 411         do {
 412                 if ((p = tmp->task) != NULL) {
 413                         if (p->state == TASK_INTERRUPTIBLE)
 414                                 wake_up_process(p);
 415                 }
 416                 if (!tmp->next) {
 417                         printk("wait_queue is bad (eip = %p)\n",
 418                                 __builtin_return_address(0));
 419                         printk("        q = %p\n",q);
 420                         printk("       *q = %p\n",*q);
 421                         printk("      tmp = %p\n",tmp);
 422                         break;
 423                 }
 424                 tmp = tmp->next;
 425         } while (tmp != *q);
 426 }
 427 
 428 void __down(struct semaphore * sem)
     /* [previous][next][first][last][top][bottom][index][help] */
 429 {
 430         struct wait_queue wait = { current, NULL };
 431         add_wait_queue(&sem->wait, &wait);
 432         current->state = TASK_UNINTERRUPTIBLE;
 433         while (sem->count <= 0) {
 434                 schedule();
 435                 current->state = TASK_UNINTERRUPTIBLE;
 436         }
 437         current->state = TASK_RUNNING;
 438         remove_wait_queue(&sem->wait, &wait);
 439 }
 440 
 441 static inline void __sleep_on(struct wait_queue **p, int state)
     /* [previous][next][first][last][top][bottom][index][help] */
 442 {
 443         unsigned long flags;
 444         struct wait_queue wait = { current, NULL };
 445 
 446         if (!p)
 447                 return;
 448         if (current == task[0])
 449                 panic("task[0] trying to sleep");
 450         current->state = state;
 451         add_wait_queue(p, &wait);
 452         save_flags(flags);
 453         sti();
 454         schedule();
 455         remove_wait_queue(p, &wait);
 456         restore_flags(flags);
 457 }
 458 
 459 void interruptible_sleep_on(struct wait_queue **p)
     /* [previous][next][first][last][top][bottom][index][help] */
 460 {
 461         __sleep_on(p,TASK_INTERRUPTIBLE);
 462 }
 463 
 464 void sleep_on(struct wait_queue **p)
     /* [previous][next][first][last][top][bottom][index][help] */
 465 {
 466         __sleep_on(p,TASK_UNINTERRUPTIBLE);
 467 }
 468 
 469 /*
 470  * The head for the timer-list has a "expires" field of MAX_UINT,
 471  * and the sorting routine counts on this..
 472  */
 473 static struct timer_list timer_head = { &timer_head, &timer_head, ~0, 0, NULL };
 474 #define SLOW_BUT_DEBUGGING_TIMERS 0
 475 
 476 void add_timer(struct timer_list * timer)
     /* [previous][next][first][last][top][bottom][index][help] */
 477 {
 478         unsigned long flags;
 479         struct timer_list *p;
 480 
 481 #if SLOW_BUT_DEBUGGING_TIMERS
 482         if (timer->next || timer->prev) {
 483                 printk("add_timer() called with non-zero list from %p\n",
 484                         __builtin_return_address(0));
 485                 return;
 486         }
 487 #endif
 488         p = &timer_head;
 489         save_flags(flags);
 490         cli();
 491         do {
 492                 p = p->next;
 493         } while (timer->expires > p->expires);
 494         timer->next = p;
 495         timer->prev = p->prev;
 496         p->prev = timer;
 497         timer->prev->next = timer;
 498         restore_flags(flags);
 499 }
 500 
 501 int del_timer(struct timer_list * timer)
     /* [previous][next][first][last][top][bottom][index][help] */
 502 {
 503         unsigned long flags;
 504 #if SLOW_BUT_DEBUGGING_TIMERS
 505         struct timer_list * p;
 506 
 507         p = &timer_head;
 508         save_flags(flags);
 509         cli();
 510         while ((p = p->next) != &timer_head) {
 511                 if (p == timer) {
 512                         timer->next->prev = timer->prev;
 513                         timer->prev->next = timer->next;
 514                         timer->next = timer->prev = NULL;
 515                         restore_flags(flags);
 516                         return 1;
 517                 }
 518         }
 519         if (timer->next || timer->prev)
 520                 printk("del_timer() called from %p with timer not initialized\n",
 521                         __builtin_return_address(0));
 522         restore_flags(flags);
 523         return 0;
 524 #else
 525         struct timer_list * next;
 526         int ret = 0;
 527         save_flags(flags);
 528         cli();
 529         if ((next = timer->next) != NULL) {
 530                 (next->prev = timer->prev)->next = next;
 531                 timer->next = timer->prev = NULL;
 532                 ret = 1;
 533         }
 534         restore_flags(flags);
 535         return ret;
 536 #endif
 537 }
 538 
 539 unsigned long timer_active = 0;
 540 struct timer_struct timer_table[32];
 541 
 542 /*
 543  * Hmm.. Changed this, as the GNU make sources (load.c) seems to
 544  * imply that avenrun[] is the standard name for this kind of thing.
 545  * Nothing else seems to be standardized: the fractional size etc
 546  * all seem to differ on different machines.
 547  */
 548 unsigned long avenrun[3] = { 0,0,0 };
 549 
 550 /*
 551  * Nr of active tasks - counted in fixed-point numbers
 552  */
 553 static unsigned long count_active_tasks(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 554 {
 555         struct task_struct **p;
 556         unsigned long nr = 0;
 557 
 558         for(p = &LAST_TASK; p > &FIRST_TASK; --p)
 559                 if (*p && ((*p)->state == TASK_RUNNING ||
 560                            (*p)->state == TASK_UNINTERRUPTIBLE ||
 561                            (*p)->state == TASK_SWAPPING))
 562                         nr += FIXED_1;
 563 #ifdef __SMP__
 564         nr-=(smp_num_cpus-1)*FIXED_1;
 565 #endif                  
 566         return nr;
 567 }
 568 
 569 static inline void calc_load(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 570 {
 571         unsigned long active_tasks; /* fixed-point */
 572         static int count = LOAD_FREQ;
 573 
 574         if (count-- > 0)
 575                 return;
 576         count = LOAD_FREQ;
 577         active_tasks = count_active_tasks();
 578         CALC_LOAD(avenrun[0], EXP_1, active_tasks);
 579         CALC_LOAD(avenrun[1], EXP_5, active_tasks);
 580         CALC_LOAD(avenrun[2], EXP_15, active_tasks);
 581 }
 582 
 583 /*
 584  * this routine handles the overflow of the microsecond field
 585  *
 586  * The tricky bits of code to handle the accurate clock support
 587  * were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame.
 588  * They were originally developed for SUN and DEC kernels.
 589  * All the kudos should go to Dave for this stuff.
 590  *
 591  */
 592 static void second_overflow(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 593 {
 594     long ltemp;
 595 
 596     /* Bump the maxerror field */
 597     time_maxerror = (0x70000000-time_maxerror <
 598                      time_tolerance >> SHIFT_USEC) ?
 599         0x70000000 : (time_maxerror + (time_tolerance >> SHIFT_USEC));
 600 
 601     /*
 602      * Leap second processing. If in leap-insert state at
 603      * the end of the day, the system clock is set back one
 604      * second; if in leap-delete state, the system clock is
 605      * set ahead one second. The microtime() routine or
 606      * external clock driver will insure that reported time
 607      * is always monotonic. The ugly divides should be
 608      * replaced.
 609      */
 610     switch (time_state) {
 611 
 612     case TIME_OK:
 613         if (time_status & STA_INS)
 614             time_state = TIME_INS;
 615         else if (time_status & STA_DEL)
 616             time_state = TIME_DEL;
 617         break;
 618 
 619     case TIME_INS:
 620         if (xtime.tv_sec % 86400 == 0) {
 621             xtime.tv_sec--;
 622             time_state = TIME_OOP;
 623             printk("Clock: inserting leap second 23:59:60 UTC\n");
 624         }
 625         break;
 626 
 627     case TIME_DEL:
 628         if ((xtime.tv_sec + 1) % 86400 == 0) {
 629             xtime.tv_sec++;
 630             time_state = TIME_WAIT;
 631             printk("Clock: deleting leap second 23:59:59 UTC\n");
 632         }
 633         break;
 634 
 635     case TIME_OOP:
 636 
 637         time_state = TIME_WAIT;
 638         break;
 639 
 640     case TIME_WAIT:
 641         if (!(time_status & (STA_INS | STA_DEL)))
 642             time_state = TIME_OK;
 643     }
 644 
 645     /*
 646      * Compute the phase adjustment for the next second. In
 647      * PLL mode, the offset is reduced by a fixed factor
 648      * times the time constant. In FLL mode the offset is
 649      * used directly. In either mode, the maximum phase
 650      * adjustment for each second is clamped so as to spread
 651      * the adjustment over not more than the number of
 652      * seconds between updates.
 653      */
 654     if (time_offset < 0) {
 655         ltemp = -time_offset;
 656         if (!(time_status & STA_FLL))
 657             ltemp >>= SHIFT_KG + time_constant;
 658         if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE)
 659             ltemp = (MAXPHASE / MINSEC) <<
 660                 SHIFT_UPDATE;
 661         time_offset += ltemp;
 662         time_adj = -ltemp << (SHIFT_SCALE - SHIFT_HZ -
 663                               SHIFT_UPDATE);
 664     } else {
 665         ltemp = time_offset;
 666         if (!(time_status & STA_FLL))
 667             ltemp >>= SHIFT_KG + time_constant;
 668         if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE)
 669             ltemp = (MAXPHASE / MINSEC) <<
 670                 SHIFT_UPDATE;
 671         time_offset -= ltemp;
 672         time_adj = ltemp << (SHIFT_SCALE - SHIFT_HZ -
 673                              SHIFT_UPDATE);
 674     }
 675 
 676     /*
 677      * Compute the frequency estimate and additional phase
 678      * adjustment due to frequency error for the next
 679      * second. When the PPS signal is engaged, gnaw on the
 680      * watchdog counter and update the frequency computed by
 681      * the pll and the PPS signal.
 682      */
 683     pps_valid++;
 684     if (pps_valid == PPS_VALID) {
 685         pps_jitter = MAXTIME;
 686         pps_stabil = MAXFREQ;
 687         time_status &= ~(STA_PPSSIGNAL | STA_PPSJITTER |
 688                          STA_PPSWANDER | STA_PPSERROR);
 689     }
 690     ltemp = time_freq + pps_freq;
 691     if (ltemp < 0)
 692         time_adj -= -ltemp >>
 693             (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE);
 694     else
 695         time_adj += ltemp >>
 696             (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE);
 697 
 698 #if HZ == 100
 699     /* compensate for (HZ==100) != 128. Add 25% to get 125; => only 3% error */
 700     if (time_adj < 0)
 701         time_adj -= -time_adj >> 2;
 702     else
 703         time_adj += time_adj >> 2;
 704 #endif
 705 }
 706 
 707 /*
 708  * disregard lost ticks for now.. We don't care enough.
 709  */
 710 static void timer_bh(void * unused)
     /* [previous][next][first][last][top][bottom][index][help] */
 711 {
 712         unsigned long mask;
 713         struct timer_struct *tp;
 714         struct timer_list * timer;
 715 
 716         cli();
 717         while ((timer = timer_head.next) != &timer_head && timer->expires <= jiffies) {
 718                 void (*fn)(unsigned long) = timer->function;
 719                 unsigned long data = timer->data;
 720                 timer->next->prev = timer->prev;
 721                 timer->prev->next = timer->next;
 722                 timer->next = timer->prev = NULL;
 723                 sti();
 724                 fn(data);
 725                 cli();
 726         }
 727         sti();
 728         
 729         for (mask = 1, tp = timer_table+0 ; mask ; tp++,mask += mask) {
 730                 if (mask > timer_active)
 731                         break;
 732                 if (!(mask & timer_active))
 733                         continue;
 734                 if (tp->expires > jiffies)
 735                         continue;
 736                 timer_active &= ~mask;
 737                 tp->fn();
 738                 sti();
 739         }
 740 }
 741 
 742 void tqueue_bh(void * unused)
     /* [previous][next][first][last][top][bottom][index][help] */
 743 {
 744         run_task_queue(&tq_timer);
 745 }
 746 
 747 void immediate_bh(void * unused)
     /* [previous][next][first][last][top][bottom][index][help] */
 748 {
 749         run_task_queue(&tq_immediate);
 750 }
 751 
 752 void do_timer(struct pt_regs * regs)
     /* [previous][next][first][last][top][bottom][index][help] */
 753 {
 754         unsigned long mask;
 755         struct timer_struct *tp;
 756         long ltemp, psecs;
 757 #ifdef  __SMP_PROF__
 758         int cpu,i;
 759 #endif
 760 
 761         /* Advance the phase, once it gets to one microsecond, then
 762          * advance the tick more.
 763          */
 764         time_phase += time_adj;
 765         if (time_phase <= -FINEUSEC) {
 766                 ltemp = -time_phase >> SHIFT_SCALE;
 767                 time_phase += ltemp << SHIFT_SCALE;
 768                 xtime.tv_usec += tick + time_adjust_step - ltemp;
 769         }
 770         else if (time_phase >= FINEUSEC) {
 771                 ltemp = time_phase >> SHIFT_SCALE;
 772                 time_phase -= ltemp << SHIFT_SCALE;
 773                 xtime.tv_usec += tick + time_adjust_step + ltemp;
 774         } else
 775                 xtime.tv_usec += tick + time_adjust_step;
 776 
 777         if (time_adjust) {
 778             /* We are doing an adjtime thing. 
 779              *
 780              * Modify the value of the tick for next time.
 781              * Note that a positive delta means we want the clock
 782              * to run fast. This means that the tick should be bigger
 783              *
 784              * Limit the amount of the step for *next* tick to be
 785              * in the range -tickadj .. +tickadj
 786              */
 787              if (time_adjust > tickadj)
 788                time_adjust_step = tickadj;
 789              else if (time_adjust < -tickadj)
 790                time_adjust_step = -tickadj;
 791              else
 792                time_adjust_step = time_adjust;
 793              
 794             /* Reduce by this step the amount of time left  */
 795             time_adjust -= time_adjust_step;
 796         }
 797         else
 798             time_adjust_step = 0;
 799 
 800         if (xtime.tv_usec >= 1000000) {
 801             xtime.tv_usec -= 1000000;
 802             xtime.tv_sec++;
 803             second_overflow();
 804         }
 805 
 806         jiffies++;
 807         calc_load();
 808 #ifdef  __SMP_PROF__
 809         smp_idle_count[NR_CPUS]++;    /* count timer ticks */
 810         cpu = smp_processor_id();
 811         for (i=0;i<(0==smp_num_cpus?1:smp_num_cpus);i++) 
 812                 if (test_bit(i,&smp_idle_map)) smp_idle_count[i]++;
 813 #endif
 814         if (user_mode(regs)) {
 815                 current->utime++;
 816                 if (current->pid) {
 817                         if (current->priority < DEF_PRIORITY)
 818                                 kstat.cpu_nice++;
 819                         else
 820                                 kstat.cpu_user++;
 821                 }
 822                 /* Update ITIMER_VIRT for current task if not in a system call */
 823                 if (current->it_virt_value && !(--current->it_virt_value)) {
 824                         current->it_virt_value = current->it_virt_incr;
 825                         send_sig(SIGVTALRM,current,1);
 826                 }
 827         } else {
 828                 current->stime++;
 829                 if(current->pid)
 830                         kstat.cpu_system++;
 831                 if (prof_buffer && current->pid) {
 832                         extern int _stext;
 833                         unsigned long ip = instruction_pointer(regs);
 834                         ip -= (unsigned long) &_stext;
 835                         ip >>= prof_shift;
 836                         if (ip < prof_len)
 837                                 prof_buffer[ip]++;
 838                 }
 839         }
 840         /*
 841          * check the cpu time limit on the process.
 842          */
 843         if ((current->rlim[RLIMIT_CPU].rlim_max != RLIM_INFINITY) &&
 844             (((current->stime + current->utime) / HZ) >= current->rlim[RLIMIT_CPU].rlim_max))
 845                 send_sig(SIGKILL, current, 1);
 846         if ((current->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) &&
 847             (((current->stime + current->utime) % HZ) == 0)) {
 848                 psecs = (current->stime + current->utime) / HZ;
 849                 /* send when equal */
 850                 if (psecs == current->rlim[RLIMIT_CPU].rlim_cur)
 851                         send_sig(SIGXCPU, current, 1);
 852                 /* and every five seconds thereafter. */
 853                 else if ((psecs > current->rlim[RLIMIT_CPU].rlim_cur) &&
 854                         ((psecs - current->rlim[RLIMIT_CPU].rlim_cur) % 5) == 0)
 855                         send_sig(SIGXCPU, current, 1);
 856         }
 857 
 858         if (current->pid && 0 > --current->counter) {
 859                 current->counter = 0;
 860                 need_resched = 1;
 861         }
 862         /* Update ITIMER_PROF for the current task */
 863         if (current->it_prof_value && !(--current->it_prof_value)) {
 864                 current->it_prof_value = current->it_prof_incr;
 865                 send_sig(SIGPROF,current,1);
 866         }
 867         for (mask = 1, tp = timer_table+0 ; mask ; tp++,mask += mask) {
 868                 if (mask > timer_active)
 869                         break;
 870                 if (!(mask & timer_active))
 871                         continue;
 872                 if (tp->expires > jiffies)
 873                         continue;
 874                 mark_bh(TIMER_BH);
 875         }
 876         cli();
 877         if (timer_head.next->expires <= jiffies)
 878                 mark_bh(TIMER_BH);
 879         if (tq_timer != &tq_last)
 880                 mark_bh(TQUEUE_BH);
 881         sti();
 882 }
 883 
 884 asmlinkage unsigned int sys_alarm(unsigned int seconds)
     /* [previous][next][first][last][top][bottom][index][help] */
 885 {
 886         struct itimerval it_new, it_old;
 887         unsigned int oldalarm;
 888 
 889         it_new.it_interval.tv_sec = it_new.it_interval.tv_usec = 0;
 890         it_new.it_value.tv_sec = seconds;
 891         it_new.it_value.tv_usec = 0;
 892         _setitimer(ITIMER_REAL, &it_new, &it_old);
 893         oldalarm = it_old.it_value.tv_sec;
 894         /* ehhh.. We can't return 0 if we have an alarm pending.. */
 895         /* And we'd better return too much than too little anyway */
 896         if (it_old.it_value.tv_usec)
 897                 oldalarm++;
 898         return oldalarm;
 899 }
 900 
 901 asmlinkage int sys_getpid(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 902 {
 903         return current->pid;
 904 }
 905 
 906 asmlinkage int sys_getppid(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 907 {
 908         return current->p_opptr->pid;
 909 }
 910 
 911 asmlinkage int sys_getuid(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 912 {
 913         return current->uid;
 914 }
 915 
 916 asmlinkage int sys_geteuid(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 917 {
 918         return current->euid;
 919 }
 920 
 921 asmlinkage int sys_getgid(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 922 {
 923         return current->gid;
 924 }
 925 
 926 asmlinkage int sys_getegid(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 927 {
 928         return current->egid;
 929 }
 930 
 931 asmlinkage int sys_nice(int increment)
     /* [previous][next][first][last][top][bottom][index][help] */
 932 {
 933         unsigned long newprio;
 934         int increase = 0;
 935 
 936         newprio = increment;
 937         if (increment < 0) {
 938                 if (!suser())
 939                         return -EPERM;
 940                 newprio = -increment;
 941                 increase = 1;
 942         }
 943         if (newprio > 40)
 944                 newprio = 40;
 945         /*
 946          * do a "normalization" of the priority (traditionally
 947          * unix nice values are -20..20, linux doesn't really
 948          * use that kind of thing, but uses the length of the
 949          * timeslice instead (default 150 msec). The rounding is
 950          * why we want to avoid negative values.
 951          */
 952         newprio = (newprio * DEF_PRIORITY + 10) / 20;
 953         increment = newprio;
 954         if (increase)
 955                 increment = -increment;
 956         newprio = current->priority - increment;
 957         if (newprio < 1)
 958                 newprio = 1;
 959         if (newprio > DEF_PRIORITY*2)
 960                 newprio = DEF_PRIORITY*2;
 961         current->priority = newprio;
 962         return 0;
 963 }
 964 
 965 static struct task_struct *find_process_by_pid(pid_t pid) {
     /* [previous][next][first][last][top][bottom][index][help] */
 966         struct task_struct *p, *q;
 967 
 968         if (pid == 0)
 969                 p = current;
 970         else {
 971                 p = 0;
 972                 for_each_task(q) {
 973                         if (q && q->pid == pid) {
 974                                 p = q;
 975                                 break;
 976                         }
 977                 }
 978         }
 979         return p;
 980 }
 981 
 982 static int setscheduler(pid_t pid, int policy, 
     /* [previous][next][first][last][top][bottom][index][help] */
 983                         struct sched_param *param)
 984 {
 985         int error;
 986         struct sched_param lp;
 987         struct task_struct *p;
 988 
 989         if (!param || pid < 0)
 990                 return -EINVAL;
 991 
 992         error = verify_area(VERIFY_READ, param, sizeof(struct sched_param));
 993         if (error)
 994                 return error;
 995         memcpy_fromfs(&lp, param, sizeof(struct sched_param));
 996 
 997         p = find_process_by_pid(pid);
 998         if (!p)
 999                 return -ESRCH;
1000                         
1001         if (policy < 0)
1002                 policy = p->policy;
1003         else if (policy != SCHED_FIFO && policy != SCHED_RR &&
1004                  policy != SCHED_OTHER)
1005                 return -EINVAL;
1006         
1007         /*
1008          * Valid priorities for SCHED_FIFO and SCHED_RR are 1..99, valid
1009          * priority for SCHED_OTHER is 0.
1010          */
1011         if (lp.sched_priority < 0 || lp.sched_priority > 99)
1012                 return -EINVAL;
1013         if ((policy == SCHED_OTHER) != (lp.sched_priority == 0))
1014                 return -EINVAL;
1015 
1016         if ((policy == SCHED_FIFO || policy == SCHED_RR) && !suser())
1017                 return -EPERM;
1018         if ((current->euid != p->euid) && (current->euid != p->uid) &&
1019             !suser())
1020                 return -EPERM;
1021 
1022         p->policy = policy;
1023         p->rt_priority = lp.sched_priority;
1024         schedule();
1025 
1026         return 0;
1027 }
1028 
1029 asmlinkage int sys_sched_setscheduler(pid_t pid, int policy, 
     /* [previous][next][first][last][top][bottom][index][help] */
1030                                       struct sched_param *param)
1031 {
1032         return setscheduler(pid, policy, param);
1033 }
1034 
1035 asmlinkage int sys_sched_setparam(pid_t pid, struct sched_param *param)
     /* [previous][next][first][last][top][bottom][index][help] */
1036 {
1037         return setscheduler(pid, -1, param);
1038 }
1039 
1040 asmlinkage int sys_sched_getscheduler(pid_t pid)
     /* [previous][next][first][last][top][bottom][index][help] */
1041 {
1042         struct task_struct *p;
1043 
1044         if (pid < 0)
1045                 return -EINVAL;
1046 
1047         p = find_process_by_pid(pid);
1048         if (!p)
1049                 return -ESRCH;
1050                         
1051         return p->policy;
1052 }
1053 
1054 asmlinkage int sys_sched_getparam(pid_t pid, struct sched_param *param)
     /* [previous][next][first][last][top][bottom][index][help] */
1055 {
1056         int error;
1057         struct task_struct *p;
1058         struct sched_param lp;
1059 
1060         if (!param || pid < 0)
1061                 return -EINVAL;
1062 
1063         error = verify_area(VERIFY_WRITE, param, sizeof(struct sched_param));
1064         if (error)
1065                 return error;
1066 
1067         p = find_process_by_pid(pid);
1068         if (!p)
1069                 return -ESRCH;
1070 
1071         lp.sched_priority = p->rt_priority;
1072         memcpy_tofs(param, &lp, sizeof(struct sched_param));
1073 
1074         return 0;
1075 }
1076 
1077 asmlinkage int sys_sched_yield(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1078 {
1079         /* ... not yet implemented ... */
1080         return -ENOSYS;
1081 }
1082 
1083 asmlinkage int sys_sched_get_priority_max(int policy)
     /* [previous][next][first][last][top][bottom][index][help] */
1084 {
1085         switch (policy) {
1086               case SCHED_FIFO:
1087               case SCHED_RR:
1088                 return 99;
1089               case SCHED_OTHER:
1090                 return 0;
1091         }
1092 
1093         return -EINVAL;
1094 }
1095 
1096 asmlinkage int sys_sched_get_priority_min(int policy)
     /* [previous][next][first][last][top][bottom][index][help] */
1097 {
1098         switch (policy) {
1099               case SCHED_FIFO:
1100               case SCHED_RR:
1101                 return 1;
1102               case SCHED_OTHER:
1103                 return 0;
1104         }
1105 
1106         return -EINVAL;
1107 }
1108 
1109 asmlinkage int sys_sched_rr_get_interval(pid_t pid, struct timespec *interval)
     /* [previous][next][first][last][top][bottom][index][help] */
1110 {
1111         int error;
1112         struct timespec t;
1113 
1114         error = verify_area(VERIFY_WRITE, interval, sizeof(struct timespec));
1115         if (error)
1116                 return error;
1117         
1118         t.tv_sec = 0;
1119         t.tv_nsec = 0;   /* <-- Linus, please fill correct value in here */
1120         return -ENOSYS;  /* and then delete this line. Thanks!           */
1121         memcpy_tofs(interval, &t, sizeof(struct timespec));
1122 
1123         return 0;
1124 }
1125 
1126 static void show_task(int nr,struct task_struct * p)
     /* [previous][next][first][last][top][bottom][index][help] */
1127 {
1128         unsigned long free;
1129         static const char * stat_nam[] = { "R", "S", "D", "Z", "T", "W" };
1130 
1131         printk("%-8s %3d ", p->comm, (p == current) ? -nr : nr);
1132         if (((unsigned) p->state) < sizeof(stat_nam)/sizeof(char *))
1133                 printk(stat_nam[p->state]);
1134         else
1135                 printk(" ");
1136 #if ((~0UL) == 0xffffffff)
1137         if (p == current)
1138                 printk(" current  ");
1139         else
1140                 printk(" %08lX ", thread_saved_pc(&p->tss));
1141 #else
1142         if (p == current)
1143                 printk("   current task   ");
1144         else
1145                 printk(" %016lx ", thread_saved_pc(&p->tss));
1146 #endif
1147         for (free = 1; free < PAGE_SIZE/sizeof(long) ; free++) {
1148                 if (((unsigned long *)p->kernel_stack_page)[free])
1149                         break;
1150         }
1151         printk("%5lu %5d %6d ", free*sizeof(long), p->pid, p->p_pptr->pid);
1152         if (p->p_cptr)
1153                 printk("%5d ", p->p_cptr->pid);
1154         else
1155                 printk("      ");
1156         if (p->p_ysptr)
1157                 printk("%7d", p->p_ysptr->pid);
1158         else
1159                 printk("       ");
1160         if (p->p_osptr)
1161                 printk(" %5d\n", p->p_osptr->pid);
1162         else
1163                 printk("\n");
1164 }
1165 
1166 void show_state(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1167 {
1168         int i;
1169 
1170 #if ((~0UL) == 0xffffffff)
1171         printk("\n"
1172                "                         free                        sibling\n");
1173         printk("  task             PC    stack   pid father child younger older\n");
1174 #else
1175         printk("\n"
1176                "                                 free                        sibling\n");
1177         printk("  task                 PC        stack   pid father child younger older\n");
1178 #endif
1179         for (i=0 ; i<NR_TASKS ; i++)
1180                 if (task[i])
1181                         show_task(i,task[i]);
1182 }
1183 
1184 void sched_init(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1185 {
1186         /*
1187          *      We have to do a little magic to get the first
1188          *      process right in SMP mode.
1189          */
1190         int cpu=smp_processor_id();
1191         current_set[cpu]=&init_task;
1192 #ifdef __SMP__  
1193         init_task.processor=cpu;
1194 #endif
1195         bh_base[TIMER_BH].routine = timer_bh;
1196         bh_base[TQUEUE_BH].routine = tqueue_bh;
1197         bh_base[IMMEDIATE_BH].routine = immediate_bh;
1198         enable_bh(TIMER_BH);
1199         enable_bh(TQUEUE_BH);
1200         enable_bh(IMMEDIATE_BH);
1201 }

/* [previous][next][first][last][top][bottom][index][help] */