root/kernel/sched.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. add_to_runqueue
  2. del_from_runqueue
  3. move_last_runqueue
  4. wake_up_process
  5. process_timeout
  6. goodness
  7. schedule
  8. sys_pause
  9. wake_up
  10. wake_up_interruptible
  11. __down
  12. __sleep_on
  13. interruptible_sleep_on
  14. sleep_on
  15. add_timer
  16. del_timer
  17. count_active_tasks
  18. calc_load
  19. second_overflow
  20. timer_bh
  21. tqueue_bh
  22. immediate_bh
  23. do_timer
  24. sys_alarm
  25. sys_getpid
  26. sys_getppid
  27. sys_getuid
  28. sys_geteuid
  29. sys_getgid
  30. sys_getegid
  31. sys_nice
  32. find_process_by_pid
  33. setscheduler
  34. sys_sched_setscheduler
  35. sys_sched_setparam
  36. sys_sched_getscheduler
  37. sys_sched_getparam
  38. sys_sched_yield
  39. sys_sched_get_priority_max
  40. sys_sched_get_priority_min
  41. sys_sched_rr_get_interval
  42. show_task
  43. show_state
  44. sched_init

   1 /*
   2  *  linux/kernel/sched.c
   3  *
   4  *  Copyright (C) 1991, 1992  Linus Torvalds
   5  */
   6 
   7 /*
   8  * 'sched.c' is the main kernel file. It contains scheduling primitives
   9  * (sleep_on, wakeup, schedule etc) as well as a number of simple system
  10  * call functions (type getpid(), which just extracts a field from
  11  * current-task
  12  */
  13 
  14 #include <linux/signal.h>
  15 #include <linux/sched.h>
  16 #include <linux/timer.h>
  17 #include <linux/kernel.h>
  18 #include <linux/kernel_stat.h>
  19 #include <linux/fdreg.h>
  20 #include <linux/errno.h>
  21 #include <linux/time.h>
  22 #include <linux/ptrace.h>
  23 #include <linux/delay.h>
  24 #include <linux/interrupt.h>
  25 #include <linux/tqueue.h>
  26 #include <linux/resource.h>
  27 #include <linux/mm.h>
  28 #include <linux/smp.h>
  29 
  30 #include <asm/system.h>
  31 #include <asm/io.h>
  32 #include <asm/segment.h>
  33 #include <asm/pgtable.h>
  34 #include <asm/mmu_context.h>
  35 
  36 #include <linux/timex.h>
  37 
  38 /*
  39  * kernel variables
  40  */
  41 
  42 int securelevel = 0;                    /* system security level */
  43 
  44 long tick = 1000000 / HZ;               /* timer interrupt period */
  45 volatile struct timeval xtime;          /* The current time */
  46 int tickadj = 500/HZ;                   /* microsecs */
  47 
  48 DECLARE_TASK_QUEUE(tq_timer);
  49 DECLARE_TASK_QUEUE(tq_immediate);
  50 DECLARE_TASK_QUEUE(tq_scheduler);
  51 
  52 /*
  53  * phase-lock loop variables
  54  */
  55 int time_state = TIME_BAD;     /* clock synchronization status */
  56 int time_status = STA_UNSYNC | STA_PLL; /* clock status bits */
  57 long time_offset = 0;           /* time adjustment (us) */
  58 long time_constant = 2;         /* pll time constant */
  59 long time_tolerance = MAXFREQ;  /* frequency tolerance (ppm) */
  60 long time_precision = 1;        /* clock precision (us) */
  61 long time_maxerror = 0x70000000;/* maximum error */
  62 long time_esterror = 0x70000000;/* estimated error */
  63 long time_phase = 0;            /* phase offset (scaled us) */
  64 long time_freq = 0;             /* frequency offset (scaled ppm) */
  65 long time_adj = 0;              /* tick adjust (scaled 1 / HZ) */
  66 long time_reftime = 0;          /* time at last adjustment (s) */
  67 
  68 long time_adjust = 0;
  69 long time_adjust_step = 0;
  70 
  71 int need_resched = 0;
  72 unsigned long event = 0;
  73 
  74 extern int _setitimer(int, struct itimerval *, struct itimerval *);
  75 unsigned long * prof_buffer = NULL;
  76 unsigned long prof_len = 0;
  77 unsigned long prof_shift = 0;
  78 
  79 #define _S(nr) (1<<((nr)-1))
  80 
  81 extern void mem_use(void);
  82 
  83 static unsigned long init_kernel_stack[1024] = { STACK_MAGIC, };
  84 unsigned long init_user_stack[1024] = { STACK_MAGIC, };
  85 static struct vm_area_struct init_mmap = INIT_MMAP;
  86 static struct fs_struct init_fs = INIT_FS;
  87 static struct files_struct init_files = INIT_FILES;
  88 static struct signal_struct init_signals = INIT_SIGNALS;
  89 
  90 struct mm_struct init_mm = INIT_MM;
  91 struct task_struct init_task = INIT_TASK;
  92 
  93 unsigned long volatile jiffies=0;
  94 
  95 struct task_struct *current_set[NR_CPUS];
  96 struct task_struct *last_task_used_math = NULL;
  97 
  98 struct task_struct * task[NR_TASKS] = {&init_task, };
  99 
 100 struct kernel_stat kstat = { 0 };
 101 
 102 static inline void add_to_runqueue(struct task_struct * p)
     /* [previous][next][first][last][top][bottom][index][help] */
 103 {
 104 #if 1   /* sanity tests */
 105         if (p->next_run || p->prev_run) {
 106                 printk("task already on run-queue\n");
 107                 return;
 108         }
 109 #endif
 110         if (p->counter > current->counter + 3)
 111                 need_resched = 1;
 112         nr_running++;
 113         (p->prev_run = init_task.prev_run)->next_run = p;
 114         p->next_run = &init_task;
 115         init_task.prev_run = p;
 116 }
 117 
 118 static inline void del_from_runqueue(struct task_struct * p)
     /* [previous][next][first][last][top][bottom][index][help] */
 119 {
 120         struct task_struct *next = p->next_run;
 121         struct task_struct *prev = p->prev_run;
 122 
 123 #if 1   /* sanity tests */
 124         if (!next || !prev) {
 125                 printk("task not on run-queue\n");
 126                 return;
 127         }
 128 #endif
 129         if (p == &init_task) {
 130                 static int nr = 0;
 131                 if (nr < 5) {
 132                         nr++;
 133                         printk("idle task may not sleep\n");
 134                 }
 135                 return;
 136         }
 137         nr_running--;
 138         next->prev_run = prev;
 139         prev->next_run = next;
 140         p->next_run = NULL;
 141         p->prev_run = NULL;
 142 }
 143 
 144 static inline void move_last_runqueue(struct task_struct * p)
     /* [previous][next][first][last][top][bottom][index][help] */
 145 {
 146         struct task_struct *next = p->next_run;
 147         struct task_struct *prev = p->prev_run;
 148 
 149         next->prev_run = prev;
 150         prev->next_run = next;
 151         (p->prev_run = init_task.prev_run)->next_run = p;
 152         p->next_run = &init_task;
 153         init_task.prev_run = p;
 154 }
 155 
 156 /*
 157  * Wake up a process. Put it on the run-queue if it's not
 158  * already there.  The "current" process is always on the
 159  * run-queue (except when the actual re-schedule is in
 160  * progress), and as such you're allowed to do the simpler
 161  * "current->state = TASK_RUNNING" to mark yourself runnable
 162  * without the overhead of this.
 163  */
 164 inline void wake_up_process(struct task_struct * p)
     /* [previous][next][first][last][top][bottom][index][help] */
 165 {
 166         unsigned long flags;
 167 
 168         save_flags(flags);
 169         cli();
 170         p->state = TASK_RUNNING;
 171         if (!p->next_run)
 172                 add_to_runqueue(p);
 173         restore_flags(flags);
 174 }
 175 
 176 static void process_timeout(unsigned long __data)
     /* [previous][next][first][last][top][bottom][index][help] */
 177 {
 178         struct task_struct * p = (struct task_struct *) __data;
 179 
 180         p->timeout = 0;
 181         wake_up_process(p);
 182 }
 183 
 184 /*
 185  * This is the function that decides how desireable a process is..
 186  * You can weigh different processes against each other depending
 187  * on what CPU they've run on lately etc to try to handle cache
 188  * and TLB miss penalties.
 189  *
 190  * Return values:
 191  *       -1000: never select this
 192  *           0: out of time, recalculate counters (but it might still be
 193  *              selected)
 194  *         +ve: "goodness" value (the larger, the better)
 195  *       +1000: realtime process, select this.
 196  */
 197 static inline int goodness(struct task_struct * p, int this_cpu)
     /* [previous][next][first][last][top][bottom][index][help] */
 198 {
 199         int weight;
 200 
 201 #ifdef __SMP__  
 202         /* We are not permitted to run a task someone else is running */
 203         if (p->processor != NO_PROC_ID)
 204                 return -1000;
 205 #endif
 206 
 207         /*
 208          * Realtime process, select the first one on the
 209          * runqueue (taking priorities within processes
 210          * into account).
 211          */
 212         if (p->policy != SCHED_OTHER)
 213                 return 1000 + p->rt_priority;
 214 
 215         /*
 216          * Give the process a first-approximation goodness value
 217          * according to the number of clock-ticks it has left.
 218          *
 219          * Don't do any other calculations if the time slice is
 220          * over..
 221          */
 222         weight = p->counter;
 223         if (weight) {
 224                         
 225 #ifdef __SMP__
 226                 /* Give a largish advantage to the same processor...   */
 227                 /* (this is equivalent to penalizing other processors) */
 228                 if (p->last_processor == this_cpu)
 229                         weight += PROC_CHANGE_PENALTY;
 230 #endif
 231 
 232                 /* .. and a slight advantage to the current process */
 233                 if (p == current)
 234                         weight += 1;
 235         }
 236 
 237         return weight;
 238 }
 239 
 240 /*
 241  *  'schedule()' is the scheduler function. It's a very simple and nice
 242  * scheduler: it's not perfect, but certainly works for most things.
 243  *
 244  * The goto is "interesting".
 245  *
 246  *   NOTE!!  Task 0 is the 'idle' task, which gets called when no other
 247  * tasks can run. It can not be killed, and it cannot sleep. The 'state'
 248  * information in task[0] is never used.
 249  */
 250 asmlinkage void schedule(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 251 {
 252         int c;
 253         struct task_struct * p;
 254         struct task_struct * next;
 255         unsigned long timeout = 0;
 256         int this_cpu=smp_processor_id();
 257 
 258 /* check alarm, wake up any interruptible tasks that have got a signal */
 259 
 260         if (intr_count) {
 261                 printk("Aiee: scheduling in interrupt\n");
 262                 return;
 263         }
 264         run_task_queue(&tq_scheduler);
 265 
 266         need_resched = 0;
 267         cli();
 268         /* move an exhausted RR process to be last.. */
 269         if (!current->counter && current->policy == SCHED_RR) {
 270                 current->counter = current->priority;
 271                 move_last_runqueue(current);
 272         }
 273         switch (current->state) {
 274                 case TASK_INTERRUPTIBLE:
 275                         if (current->signal & ~current->blocked)
 276                                 goto makerunnable;
 277                         timeout = current->timeout;
 278                         if (timeout && (timeout <= jiffies)) {
 279                                 current->timeout = 0;
 280                                 timeout = 0;
 281                 makerunnable:
 282                                 current->state = TASK_RUNNING;
 283                                 break;
 284                         }
 285                 default:
 286                         del_from_runqueue(current);
 287                 case TASK_RUNNING:
 288         }
 289         p = init_task.next_run;
 290         sti();
 291         
 292 #ifdef __SMP__
 293         /*
 294          *      This is safe as we do not permit re-entry of schedule()
 295          */
 296         current->processor = NO_PROC_ID;        
 297 #endif  
 298 
 299 /*
 300  * Note! there may appear new tasks on the run-queue during this, as
 301  * interrupts are enabled. However, they will be put on front of the
 302  * list, so our list starting at "p" is essentially fixed.
 303  */
 304 /* this is the scheduler proper: */
 305         c = -1000;
 306         next = &init_task;
 307         while (p != &init_task) {
 308                 int weight = goodness(p, this_cpu);
 309                 if (weight > c)
 310                         c = weight, next = p;
 311                 p = p->next_run;
 312         }
 313 
 314         /* if all runnable processes have "counter == 0", re-calculate counters */
 315         if (!c) {
 316                 for_each_task(p)
 317                         p->counter = (p->counter >> 1) + p->priority;
 318         }
 319 #ifdef __SMP__  
 320         
 321         /*
 322          *      Context switching between two idle threads is pointless.
 323          */
 324         if(!current->pid && !next->pid)
 325                 next=current;
 326         /*
 327          *      Allocate process to CPU
 328          */
 329          
 330          next->processor = this_cpu;
 331          next->last_processor = this_cpu;
 332          
 333 #endif   
 334         if (current != next) {
 335                 struct timer_list timer;
 336 
 337                 kstat.context_swtch++;
 338                 if (timeout) {
 339                         init_timer(&timer);
 340                         timer.expires = timeout;
 341                         timer.data = (unsigned long) current;
 342                         timer.function = process_timeout;
 343                         add_timer(&timer);
 344                 }
 345                 get_mmu_context(next);
 346                 switch_to(next);
 347                 if (timeout)
 348                         del_timer(&timer);
 349         }
 350 }
 351 
 352 asmlinkage int sys_pause(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 353 {
 354         current->state = TASK_INTERRUPTIBLE;
 355         schedule();
 356         return -ERESTARTNOHAND;
 357 }
 358 
 359 /*
 360  * wake_up doesn't wake up stopped processes - they have to be awakened
 361  * with signals or similar.
 362  *
 363  * Note that this doesn't need cli-sti pairs: interrupts may not change
 364  * the wait-queue structures directly, but only call wake_up() to wake
 365  * a process. The process itself must remove the queue once it has woken.
 366  */
 367 void wake_up(struct wait_queue **q)
     /* [previous][next][first][last][top][bottom][index][help] */
 368 {
 369         struct wait_queue *tmp;
 370         struct task_struct * p;
 371 
 372         if (!q || !(tmp = *q))
 373                 return;
 374         do {
 375                 if ((p = tmp->task) != NULL) {
 376                         if ((p->state == TASK_UNINTERRUPTIBLE) ||
 377                             (p->state == TASK_INTERRUPTIBLE))
 378                                 wake_up_process(p);
 379                 }
 380                 if (!tmp->next) {
 381                         printk("wait_queue is bad (eip = %p)\n",
 382                                 __builtin_return_address(0));
 383                         printk("        q = %p\n",q);
 384                         printk("       *q = %p\n",*q);
 385                         printk("      tmp = %p\n",tmp);
 386                         break;
 387                 }
 388                 tmp = tmp->next;
 389         } while (tmp != *q);
 390 }
 391 
 392 void wake_up_interruptible(struct wait_queue **q)
     /* [previous][next][first][last][top][bottom][index][help] */
 393 {
 394         struct wait_queue *tmp;
 395         struct task_struct * p;
 396 
 397         if (!q || !(tmp = *q))
 398                 return;
 399         do {
 400                 if ((p = tmp->task) != NULL) {
 401                         if (p->state == TASK_INTERRUPTIBLE)
 402                                 wake_up_process(p);
 403                 }
 404                 if (!tmp->next) {
 405                         printk("wait_queue is bad (eip = %p)\n",
 406                                 __builtin_return_address(0));
 407                         printk("        q = %p\n",q);
 408                         printk("       *q = %p\n",*q);
 409                         printk("      tmp = %p\n",tmp);
 410                         break;
 411                 }
 412                 tmp = tmp->next;
 413         } while (tmp != *q);
 414 }
 415 
 416 void __down(struct semaphore * sem)
     /* [previous][next][first][last][top][bottom][index][help] */
 417 {
 418         struct wait_queue wait = { current, NULL };
 419         add_wait_queue(&sem->wait, &wait);
 420         current->state = TASK_UNINTERRUPTIBLE;
 421         while (sem->count <= 0) {
 422                 schedule();
 423                 current->state = TASK_UNINTERRUPTIBLE;
 424         }
 425         current->state = TASK_RUNNING;
 426         remove_wait_queue(&sem->wait, &wait);
 427 }
 428 
 429 static inline void __sleep_on(struct wait_queue **p, int state)
     /* [previous][next][first][last][top][bottom][index][help] */
 430 {
 431         unsigned long flags;
 432         struct wait_queue wait = { current, NULL };
 433 
 434         if (!p)
 435                 return;
 436         if (current == task[0])
 437                 panic("task[0] trying to sleep");
 438         current->state = state;
 439         add_wait_queue(p, &wait);
 440         save_flags(flags);
 441         sti();
 442         schedule();
 443         remove_wait_queue(p, &wait);
 444         restore_flags(flags);
 445 }
 446 
 447 void interruptible_sleep_on(struct wait_queue **p)
     /* [previous][next][first][last][top][bottom][index][help] */
 448 {
 449         __sleep_on(p,TASK_INTERRUPTIBLE);
 450 }
 451 
 452 void sleep_on(struct wait_queue **p)
     /* [previous][next][first][last][top][bottom][index][help] */
 453 {
 454         __sleep_on(p,TASK_UNINTERRUPTIBLE);
 455 }
 456 
 457 /*
 458  * The head for the timer-list has a "expires" field of MAX_UINT,
 459  * and the sorting routine counts on this..
 460  */
 461 static struct timer_list timer_head = { &timer_head, &timer_head, ~0, 0, NULL };
 462 #define SLOW_BUT_DEBUGGING_TIMERS 1
 463 
 464 void add_timer(struct timer_list * timer)
     /* [previous][next][first][last][top][bottom][index][help] */
 465 {
 466         unsigned long flags;
 467         struct timer_list *p;
 468 
 469 #if SLOW_BUT_DEBUGGING_TIMERS
 470         if (timer->next || timer->prev) {
 471                 printk("add_timer() called with non-zero list from %p\n",
 472                         __builtin_return_address(0));
 473                 return;
 474         }
 475 #endif
 476         p = &timer_head;
 477         save_flags(flags);
 478         cli();
 479         do {
 480                 p = p->next;
 481         } while (timer->expires > p->expires);
 482         timer->next = p;
 483         timer->prev = p->prev;
 484         p->prev = timer;
 485         timer->prev->next = timer;
 486         restore_flags(flags);
 487 }
 488 
 489 int del_timer(struct timer_list * timer)
     /* [previous][next][first][last][top][bottom][index][help] */
 490 {
 491         unsigned long flags;
 492 #if SLOW_BUT_DEBUGGING_TIMERS
 493         struct timer_list * p;
 494 
 495         p = &timer_head;
 496         save_flags(flags);
 497         cli();
 498         while ((p = p->next) != &timer_head) {
 499                 if (p == timer) {
 500                         timer->next->prev = timer->prev;
 501                         timer->prev->next = timer->next;
 502                         timer->next = timer->prev = NULL;
 503                         restore_flags(flags);
 504                         return 1;
 505                 }
 506         }
 507         if (timer->next || timer->prev)
 508                 printk("del_timer() called from %p with timer not initialized\n",
 509                         __builtin_return_address(0));
 510         restore_flags(flags);
 511         return 0;
 512 #else   
 513         save_flags(flags);
 514         cli();
 515         if (timer->next) {
 516                 timer->next->prev = timer->prev;
 517                 timer->prev->next = timer->next;
 518                 timer->next = timer->prev = NULL;
 519                 restore_flags(flags);
 520                 return 1;
 521         }
 522         restore_flags(flags);
 523         return 0;
 524 #endif
 525 }
 526 
 527 unsigned long timer_active = 0;
 528 struct timer_struct timer_table[32];
 529 
 530 /*
 531  * Hmm.. Changed this, as the GNU make sources (load.c) seems to
 532  * imply that avenrun[] is the standard name for this kind of thing.
 533  * Nothing else seems to be standardized: the fractional size etc
 534  * all seem to differ on different machines.
 535  */
 536 unsigned long avenrun[3] = { 0,0,0 };
 537 
 538 /*
 539  * Nr of active tasks - counted in fixed-point numbers
 540  */
 541 static unsigned long count_active_tasks(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 542 {
 543         struct task_struct **p;
 544         unsigned long nr = 0;
 545 
 546         for(p = &LAST_TASK; p > &FIRST_TASK; --p)
 547                 if (*p && ((*p)->state == TASK_RUNNING ||
 548                            (*p)->state == TASK_UNINTERRUPTIBLE ||
 549                            (*p)->state == TASK_SWAPPING))
 550                         nr += FIXED_1;
 551 #ifdef __SMP__
 552         nr-=(smp_num_cpus-1)*FIXED_1;
 553 #endif                  
 554         return nr;
 555 }
 556 
 557 static inline void calc_load(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 558 {
 559         unsigned long active_tasks; /* fixed-point */
 560         static int count = LOAD_FREQ;
 561 
 562         if (count-- > 0)
 563                 return;
 564         count = LOAD_FREQ;
 565         active_tasks = count_active_tasks();
 566         CALC_LOAD(avenrun[0], EXP_1, active_tasks);
 567         CALC_LOAD(avenrun[1], EXP_5, active_tasks);
 568         CALC_LOAD(avenrun[2], EXP_15, active_tasks);
 569 }
 570 
 571 /*
 572  * this routine handles the overflow of the microsecond field
 573  *
 574  * The tricky bits of code to handle the accurate clock support
 575  * were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame.
 576  * They were originally developed for SUN and DEC kernels.
 577  * All the kudos should go to Dave for this stuff.
 578  *
 579  */
 580 static void second_overflow(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 581 {
 582     long ltemp;
 583 
 584     /* Bump the maxerror field */
 585     time_maxerror = (0x70000000-time_maxerror <
 586                      time_tolerance >> SHIFT_USEC) ?
 587         0x70000000 : (time_maxerror + (time_tolerance >> SHIFT_USEC));
 588 
 589     /*
 590      * Leap second processing. If in leap-insert state at
 591      * the end of the day, the system clock is set back one
 592      * second; if in leap-delete state, the system clock is
 593      * set ahead one second. The microtime() routine or
 594      * external clock driver will insure that reported time
 595      * is always monotonic. The ugly divides should be
 596      * replaced.
 597      */
 598     switch (time_state) {
 599 
 600     case TIME_OK:
 601         if (time_status & STA_INS)
 602             time_state = TIME_INS;
 603         else if (time_status & STA_DEL)
 604             time_state = TIME_DEL;
 605         break;
 606 
 607     case TIME_INS:
 608         if (xtime.tv_sec % 86400 == 0) {
 609             xtime.tv_sec--;
 610             time_state = TIME_OOP;
 611             printk("Clock: inserting leap second 23:59:60 UTC\n");
 612         }
 613         break;
 614 
 615     case TIME_DEL:
 616         if ((xtime.tv_sec + 1) % 86400 == 0) {
 617             xtime.tv_sec++;
 618             time_state = TIME_WAIT;
 619             printk("Clock: deleting leap second 23:59:59 UTC\n");
 620         }
 621         break;
 622 
 623     case TIME_OOP:
 624 
 625         time_state = TIME_WAIT;
 626         break;
 627 
 628     case TIME_WAIT:
 629         if (!(time_status & (STA_INS | STA_DEL)))
 630             time_state = TIME_OK;
 631     }
 632 
 633     /*
 634      * Compute the phase adjustment for the next second. In
 635      * PLL mode, the offset is reduced by a fixed factor
 636      * times the time constant. In FLL mode the offset is
 637      * used directly. In either mode, the maximum phase
 638      * adjustment for each second is clamped so as to spread
 639      * the adjustment over not more than the number of
 640      * seconds between updates.
 641      */
 642     if (time_offset < 0) {
 643         ltemp = -time_offset;
 644         if (!(time_status & STA_FLL))
 645             ltemp >>= SHIFT_KG + time_constant;
 646         if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE)
 647             ltemp = (MAXPHASE / MINSEC) <<
 648                 SHIFT_UPDATE;
 649         time_offset += ltemp;
 650         time_adj = -ltemp << (SHIFT_SCALE - SHIFT_HZ -
 651                               SHIFT_UPDATE);
 652     } else {
 653         ltemp = time_offset;
 654         if (!(time_status & STA_FLL))
 655             ltemp >>= SHIFT_KG + time_constant;
 656         if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE)
 657             ltemp = (MAXPHASE / MINSEC) <<
 658                 SHIFT_UPDATE;
 659         time_offset -= ltemp;
 660         time_adj = ltemp << (SHIFT_SCALE - SHIFT_HZ -
 661                              SHIFT_UPDATE);
 662     }
 663 
 664     /*
 665      * Compute the frequency estimate and additional phase
 666      * adjustment due to frequency error for the next
 667      * second. When the PPS signal is engaged, gnaw on the
 668      * watchdog counter and update the frequency computed by
 669      * the pll and the PPS signal.
 670      */
 671     pps_valid++;
 672     if (pps_valid == PPS_VALID) {
 673         pps_jitter = MAXTIME;
 674         pps_stabil = MAXFREQ;
 675         time_status &= ~(STA_PPSSIGNAL | STA_PPSJITTER |
 676                          STA_PPSWANDER | STA_PPSERROR);
 677     }
 678     ltemp = time_freq + pps_freq;
 679     if (ltemp < 0)
 680         time_adj -= -ltemp >>
 681             (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE);
 682     else
 683         time_adj += ltemp >>
 684             (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE);
 685 
 686 #if HZ == 100
 687     /* compensate for (HZ==100) != 128. Add 25% to get 125; => only 3% error */
 688     if (time_adj < 0)
 689         time_adj -= -time_adj >> 2;
 690     else
 691         time_adj += time_adj >> 2;
 692 #endif
 693 }
 694 
 695 /*
 696  * disregard lost ticks for now.. We don't care enough.
 697  */
 698 static void timer_bh(void * unused)
     /* [previous][next][first][last][top][bottom][index][help] */
 699 {
 700         unsigned long mask;
 701         struct timer_struct *tp;
 702         struct timer_list * timer;
 703 
 704         cli();
 705         while ((timer = timer_head.next) != &timer_head && timer->expires <= jiffies) {
 706                 void (*fn)(unsigned long) = timer->function;
 707                 unsigned long data = timer->data;
 708                 timer->next->prev = timer->prev;
 709                 timer->prev->next = timer->next;
 710                 timer->next = timer->prev = NULL;
 711                 sti();
 712                 fn(data);
 713                 cli();
 714         }
 715         sti();
 716         
 717         for (mask = 1, tp = timer_table+0 ; mask ; tp++,mask += mask) {
 718                 if (mask > timer_active)
 719                         break;
 720                 if (!(mask & timer_active))
 721                         continue;
 722                 if (tp->expires > jiffies)
 723                         continue;
 724                 timer_active &= ~mask;
 725                 tp->fn();
 726                 sti();
 727         }
 728 }
 729 
 730 void tqueue_bh(void * unused)
     /* [previous][next][first][last][top][bottom][index][help] */
 731 {
 732         run_task_queue(&tq_timer);
 733 }
 734 
 735 void immediate_bh(void * unused)
     /* [previous][next][first][last][top][bottom][index][help] */
 736 {
 737         run_task_queue(&tq_immediate);
 738 }
 739 
 740 void do_timer(struct pt_regs * regs)
     /* [previous][next][first][last][top][bottom][index][help] */
 741 {
 742         unsigned long mask;
 743         struct timer_struct *tp;
 744         long ltemp, psecs;
 745 
 746         /* Advance the phase, once it gets to one microsecond, then
 747          * advance the tick more.
 748          */
 749         time_phase += time_adj;
 750         if (time_phase <= -FINEUSEC) {
 751                 ltemp = -time_phase >> SHIFT_SCALE;
 752                 time_phase += ltemp << SHIFT_SCALE;
 753                 xtime.tv_usec += tick + time_adjust_step - ltemp;
 754         }
 755         else if (time_phase >= FINEUSEC) {
 756                 ltemp = time_phase >> SHIFT_SCALE;
 757                 time_phase -= ltemp << SHIFT_SCALE;
 758                 xtime.tv_usec += tick + time_adjust_step + ltemp;
 759         } else
 760                 xtime.tv_usec += tick + time_adjust_step;
 761 
 762         if (time_adjust) {
 763             /* We are doing an adjtime thing. 
 764              *
 765              * Modify the value of the tick for next time.
 766              * Note that a positive delta means we want the clock
 767              * to run fast. This means that the tick should be bigger
 768              *
 769              * Limit the amount of the step for *next* tick to be
 770              * in the range -tickadj .. +tickadj
 771              */
 772              if (time_adjust > tickadj)
 773                time_adjust_step = tickadj;
 774              else if (time_adjust < -tickadj)
 775                time_adjust_step = -tickadj;
 776              else
 777                time_adjust_step = time_adjust;
 778              
 779             /* Reduce by this step the amount of time left  */
 780             time_adjust -= time_adjust_step;
 781         }
 782         else
 783             time_adjust_step = 0;
 784 
 785         if (xtime.tv_usec >= 1000000) {
 786             xtime.tv_usec -= 1000000;
 787             xtime.tv_sec++;
 788             second_overflow();
 789         }
 790 
 791         jiffies++;
 792         calc_load();
 793         if (user_mode(regs)) {
 794                 current->utime++;
 795                 if (current->pid) {
 796                         if (current->priority < DEF_PRIORITY)
 797                                 kstat.cpu_nice++;
 798                         else
 799                                 kstat.cpu_user++;
 800                 }
 801                 /* Update ITIMER_VIRT for current task if not in a system call */
 802                 if (current->it_virt_value && !(--current->it_virt_value)) {
 803                         current->it_virt_value = current->it_virt_incr;
 804                         send_sig(SIGVTALRM,current,1);
 805                 }
 806         } else {
 807                 current->stime++;
 808                 if(current->pid)
 809                         kstat.cpu_system++;
 810                 if (prof_buffer && current->pid) {
 811                         extern int _stext;
 812                         unsigned long ip = instruction_pointer(regs);
 813                         ip -= (unsigned long) &_stext;
 814                         ip >>= prof_shift;
 815                         if (ip < prof_len)
 816                                 prof_buffer[ip]++;
 817                 }
 818         }
 819         /*
 820          * check the cpu time limit on the process.
 821          */
 822         if ((current->rlim[RLIMIT_CPU].rlim_max != RLIM_INFINITY) &&
 823             (((current->stime + current->utime) / HZ) >= current->rlim[RLIMIT_CPU].rlim_max))
 824                 send_sig(SIGKILL, current, 1);
 825         if ((current->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) &&
 826             (((current->stime + current->utime) % HZ) == 0)) {
 827                 psecs = (current->stime + current->utime) / HZ;
 828                 /* send when equal */
 829                 if (psecs == current->rlim[RLIMIT_CPU].rlim_cur)
 830                         send_sig(SIGXCPU, current, 1);
 831                 /* and every five seconds thereafter. */
 832                 else if ((psecs > current->rlim[RLIMIT_CPU].rlim_cur) &&
 833                         ((psecs - current->rlim[RLIMIT_CPU].rlim_cur) % 5) == 0)
 834                         send_sig(SIGXCPU, current, 1);
 835         }
 836 
 837         if (current->pid && 0 > --current->counter) {
 838                 current->counter = 0;
 839                 need_resched = 1;
 840         }
 841         /* Update ITIMER_PROF for the current task */
 842         if (current->it_prof_value && !(--current->it_prof_value)) {
 843                 current->it_prof_value = current->it_prof_incr;
 844                 send_sig(SIGPROF,current,1);
 845         }
 846         for (mask = 1, tp = timer_table+0 ; mask ; tp++,mask += mask) {
 847                 if (mask > timer_active)
 848                         break;
 849                 if (!(mask & timer_active))
 850                         continue;
 851                 if (tp->expires > jiffies)
 852                         continue;
 853                 mark_bh(TIMER_BH);
 854         }
 855         cli();
 856         if (timer_head.next->expires <= jiffies)
 857                 mark_bh(TIMER_BH);
 858         if (tq_timer != &tq_last)
 859                 mark_bh(TQUEUE_BH);
 860         sti();
 861 }
 862 
 863 asmlinkage unsigned int sys_alarm(unsigned int seconds)
     /* [previous][next][first][last][top][bottom][index][help] */
 864 {
 865         struct itimerval it_new, it_old;
 866         unsigned int oldalarm;
 867 
 868         it_new.it_interval.tv_sec = it_new.it_interval.tv_usec = 0;
 869         it_new.it_value.tv_sec = seconds;
 870         it_new.it_value.tv_usec = 0;
 871         _setitimer(ITIMER_REAL, &it_new, &it_old);
 872         oldalarm = it_old.it_value.tv_sec;
 873         /* ehhh.. We can't return 0 if we have an alarm pending.. */
 874         /* And we'd better return too much than too little anyway */
 875         if (it_old.it_value.tv_usec)
 876                 oldalarm++;
 877         return oldalarm;
 878 }
 879 
 880 asmlinkage int sys_getpid(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 881 {
 882         return current->pid;
 883 }
 884 
 885 asmlinkage int sys_getppid(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 886 {
 887         return current->p_opptr->pid;
 888 }
 889 
 890 asmlinkage int sys_getuid(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 891 {
 892         return current->uid;
 893 }
 894 
 895 asmlinkage int sys_geteuid(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 896 {
 897         return current->euid;
 898 }
 899 
 900 asmlinkage int sys_getgid(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 901 {
 902         return current->gid;
 903 }
 904 
 905 asmlinkage int sys_getegid(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 906 {
 907         return current->egid;
 908 }
 909 
 910 asmlinkage int sys_nice(int increment)
     /* [previous][next][first][last][top][bottom][index][help] */
 911 {
 912         unsigned long newprio;
 913         int increase = 0;
 914 
 915         newprio = increment;
 916         if (increment < 0) {
 917                 if (!suser())
 918                         return -EPERM;
 919                 newprio = -increment;
 920                 increase = 1;
 921         }
 922         if (newprio > 40)
 923                 newprio = 40;
 924         /*
 925          * do a "normalization" of the priority (traditionally
 926          * unix nice values are -20..20, linux doesn't really
 927          * use that kind of thing, but uses the length of the
 928          * timeslice instead (default 150 msec). The rounding is
 929          * why we want to avoid negative values.
 930          */
 931         newprio = (newprio * DEF_PRIORITY + 10) / 20;
 932         increment = newprio;
 933         if (increase)
 934                 increment = -increment;
 935         newprio = current->priority - increment;
 936         if (newprio < 1)
 937                 newprio = 1;
 938         if (newprio > DEF_PRIORITY*2)
 939                 newprio = DEF_PRIORITY*2;
 940         current->priority = newprio;
 941         return 0;
 942 }
 943 
 944 static struct task_struct *find_process_by_pid(pid_t pid) {
     /* [previous][next][first][last][top][bottom][index][help] */
 945         struct task_struct *p, *q;
 946 
 947         if (pid == 0)
 948                 p = current;
 949         else {
 950                 p = 0;
 951                 for_each_task(q) {
 952                         if (q && q->pid == pid) {
 953                                 p = q;
 954                                 break;
 955                         }
 956                 }
 957         }
 958         return p;
 959 }
 960 
 961 static int setscheduler(pid_t pid, int policy, 
     /* [previous][next][first][last][top][bottom][index][help] */
 962                         struct sched_param *param)
 963 {
 964         int error;
 965         struct sched_param lp;
 966         struct task_struct *p;
 967 
 968         if (!param || pid < 0)
 969                 return -EINVAL;
 970 
 971         error = verify_area(VERIFY_READ, param, sizeof(struct sched_param));
 972         if (error)
 973                 return -EINVAL;
 974         memcpy_fromfs(&lp, param, sizeof(struct sched_param));
 975 
 976         p = find_process_by_pid(pid);
 977         if (!p)
 978                 return -ESRCH;
 979                         
 980         if (policy < 0)
 981                 policy = p->policy;
 982         else if (policy != SCHED_FIFO && policy != SCHED_RR &&
 983                  policy != SCHED_OTHER)
 984                 return -EINVAL;
 985         
 986         /*
 987          * Valid priorities for SCHED_FIFO and SCHED_RR are 1..99, valid
 988          * priority for SCHED_OTHER is 0.
 989          */
 990         if (lp.sched_priority < 0 || lp.sched_priority > 99)
 991                 return -EINVAL;
 992         if ((policy == SCHED_OTHER) != (lp.sched_priority == 0))
 993                 return -EINVAL;
 994 
 995         if ((policy == SCHED_FIFO || policy == SCHED_RR) && !suser())
 996                 return -EPERM;
 997         if ((current->euid != p->euid) && (current->euid != p->uid) &&
 998             !suser())
 999                 return -EPERM;
1000 
1001         p->policy = policy;
1002         p->rt_priority = lp.sched_priority;
1003         schedule();
1004 
1005         return 0;
1006 }
1007 
1008 asmlinkage int sys_sched_setscheduler(pid_t pid, int policy, 
     /* [previous][next][first][last][top][bottom][index][help] */
1009                                       struct sched_param *param)
1010 {
1011         return setscheduler(pid, policy, param);
1012 }
1013 
1014 asmlinkage int sys_sched_setparam(pid_t pid, struct sched_param *param)
     /* [previous][next][first][last][top][bottom][index][help] */
1015 {
1016         return setscheduler(pid, -1, param);
1017 }
1018 
1019 asmlinkage int sys_sched_getscheduler(pid_t pid)
     /* [previous][next][first][last][top][bottom][index][help] */
1020 {
1021         struct task_struct *p;
1022 
1023         if (pid < 0)
1024                 return -EINVAL;
1025 
1026         p = find_process_by_pid(pid);
1027         if (!p)
1028                 return -ESRCH;
1029                         
1030         return p->policy;
1031 }
1032 
1033 asmlinkage int sys_sched_getparam(pid_t pid, struct sched_param *param)
     /* [previous][next][first][last][top][bottom][index][help] */
1034 {
1035         int error;
1036         struct task_struct *p;
1037         struct sched_param lp;
1038 
1039         if (!param || pid < 0)
1040                 return -EINVAL;
1041 
1042         error = verify_area(VERIFY_WRITE, param, sizeof(struct sched_param));
1043         if (error)
1044                 return -EINVAL;
1045 
1046         p = find_process_by_pid(pid);
1047         if (!p)
1048                 return -ESRCH;
1049 
1050         lp.sched_priority = p->rt_priority;
1051         memcpy_tofs(param, &lp, sizeof(struct sched_param));
1052 
1053         return 0;
1054 }
1055 
1056 asmlinkage int sys_sched_yield(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1057 {
1058         /* ... not yet implemented ... */
1059         return -ENOSYS;
1060 }
1061 
1062 asmlinkage int sys_sched_get_priority_max(int policy)
     /* [previous][next][first][last][top][bottom][index][help] */
1063 {
1064         switch (policy) {
1065               case SCHED_FIFO:
1066               case SCHED_RR:
1067                 return 99;
1068               case SCHED_OTHER:
1069                 return 0;
1070         }
1071 
1072         return -EINVAL;
1073 }
1074 
1075 asmlinkage int sys_sched_get_priority_min(int policy)
     /* [previous][next][first][last][top][bottom][index][help] */
1076 {
1077         switch (policy) {
1078               case SCHED_FIFO:
1079               case SCHED_RR:
1080                 return 1;
1081               case SCHED_OTHER:
1082                 return 0;
1083         }
1084 
1085         return -EINVAL;
1086 }
1087 
1088 asmlinkage int sys_sched_rr_get_interval(pid_t pid, struct timespec *interval)
     /* [previous][next][first][last][top][bottom][index][help] */
1089 {
1090         int error;
1091         struct timespec t;
1092 
1093         error = verify_area(VERIFY_WRITE, interval, sizeof(struct timespec));
1094         if (error)
1095                 return -EINVAL;
1096         
1097         t.tv_sec = 0;
1098         t.tv_nsec = 0;   /* <-- Linus, please fill correct value in here */
1099         return -ENOSYS;  /* and then delete this line. Thanks!           */
1100         memcpy_tofs(interval, &t, sizeof(struct timespec));
1101 
1102         return 0;
1103 }
1104 
1105 static void show_task(int nr,struct task_struct * p)
     /* [previous][next][first][last][top][bottom][index][help] */
1106 {
1107         unsigned long free;
1108         static const char * stat_nam[] = { "R", "S", "D", "Z", "T", "W" };
1109 
1110         printk("%-8s %3d ", p->comm, (p == current) ? -nr : nr);
1111         if (((unsigned) p->state) < sizeof(stat_nam)/sizeof(char *))
1112                 printk(stat_nam[p->state]);
1113         else
1114                 printk(" ");
1115 #if ((~0UL) == 0xffffffff)
1116         if (p == current)
1117                 printk(" current  ");
1118         else
1119                 printk(" %08lX ", thread_saved_pc(&p->tss));
1120 #else
1121         if (p == current)
1122                 printk("   current task   ");
1123         else
1124                 printk(" %016lx ", thread_saved_pc(&p->tss));
1125 #endif
1126         for (free = 1; free < PAGE_SIZE/sizeof(long) ; free++) {
1127                 if (((unsigned long *)p->kernel_stack_page)[free])
1128                         break;
1129         }
1130         printk("%5lu %5d %6d ", free*sizeof(long), p->pid, p->p_pptr->pid);
1131         if (p->p_cptr)
1132                 printk("%5d ", p->p_cptr->pid);
1133         else
1134                 printk("      ");
1135         if (p->p_ysptr)
1136                 printk("%7d", p->p_ysptr->pid);
1137         else
1138                 printk("       ");
1139         if (p->p_osptr)
1140                 printk(" %5d\n", p->p_osptr->pid);
1141         else
1142                 printk("\n");
1143 }
1144 
1145 void show_state(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1146 {
1147         int i;
1148 
1149 #if ((~0UL) == 0xffffffff)
1150         printk("\n"
1151                "                         free                        sibling\n");
1152         printk("  task             PC    stack   pid father child younger older\n");
1153 #else
1154         printk("\n"
1155                "                                 free                        sibling\n");
1156         printk("  task                 PC        stack   pid father child younger older\n");
1157 #endif
1158         for (i=0 ; i<NR_TASKS ; i++)
1159                 if (task[i])
1160                         show_task(i,task[i]);
1161 }
1162 
1163 void sched_init(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1164 {
1165         /*
1166          *      We have to do a little magic to get the first
1167          *      process right in SMP mode.
1168          */
1169         int cpu=smp_processor_id();
1170         current_set[cpu]=&init_task;
1171 #ifdef __SMP__  
1172         init_task.processor=cpu;
1173 #endif
1174         bh_base[TIMER_BH].routine = timer_bh;
1175         bh_base[TQUEUE_BH].routine = tqueue_bh;
1176         bh_base[IMMEDIATE_BH].routine = immediate_bh;
1177         enable_bh(TIMER_BH);
1178         enable_bh(TQUEUE_BH);
1179         enable_bh(IMMEDIATE_BH);
1180 }

/* [previous][next][first][last][top][bottom][index][help] */