root/kernel/sched.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. add_to_runqueue
  2. del_from_runqueue
  3. move_last_runqueue
  4. wake_up_process
  5. process_timeout
  6. goodness
  7. schedule
  8. sys_pause
  9. wake_up
  10. wake_up_interruptible
  11. __down
  12. __sleep_on
  13. interruptible_sleep_on
  14. sleep_on
  15. add_timer
  16. del_timer
  17. count_active_tasks
  18. calc_load
  19. second_overflow
  20. timer_bh
  21. tqueue_bh
  22. immediate_bh
  23. do_timer
  24. sys_alarm
  25. sys_getpid
  26. sys_getppid
  27. sys_getuid
  28. sys_geteuid
  29. sys_getgid
  30. sys_getegid
  31. sys_nice
  32. show_task
  33. show_state
  34. sched_init

   1 /*
   2  *  linux/kernel/sched.c
   3  *
   4  *  Copyright (C) 1991, 1992  Linus Torvalds
   5  */
   6 
   7 /*
   8  * 'sched.c' is the main kernel file. It contains scheduling primitives
   9  * (sleep_on, wakeup, schedule etc) as well as a number of simple system
  10  * call functions (type getpid(), which just extracts a field from
  11  * current-task
  12  */
  13 
  14 #include <linux/signal.h>
  15 #include <linux/sched.h>
  16 #include <linux/timer.h>
  17 #include <linux/kernel.h>
  18 #include <linux/kernel_stat.h>
  19 #include <linux/fdreg.h>
  20 #include <linux/errno.h>
  21 #include <linux/time.h>
  22 #include <linux/ptrace.h>
  23 #include <linux/delay.h>
  24 #include <linux/interrupt.h>
  25 #include <linux/tqueue.h>
  26 #include <linux/resource.h>
  27 #include <linux/mm.h>
  28 #include <linux/smp.h>
  29 
  30 #include <asm/system.h>
  31 #include <asm/io.h>
  32 #include <asm/segment.h>
  33 #include <asm/pgtable.h>
  34 
  35 #include <linux/timex.h>
  36 
  37 /*
  38  * kernel variables
  39  */
  40 long tick = 1000000 / HZ;               /* timer interrupt period */
  41 volatile struct timeval xtime;          /* The current time */
  42 int tickadj = 500/HZ;                   /* microsecs */
  43 
  44 DECLARE_TASK_QUEUE(tq_timer);
  45 DECLARE_TASK_QUEUE(tq_immediate);
  46 DECLARE_TASK_QUEUE(tq_scheduler);
  47 
  48 /*
  49  * phase-lock loop variables
  50  */
  51 int time_state = TIME_BAD;     /* clock synchronization status */
  52 int time_status = STA_UNSYNC;   /* clock status bits */
  53 long time_offset = 0;           /* time adjustment (us) */
  54 long time_constant = 0;         /* pll time constant */
  55 long time_tolerance = MAXFREQ;  /* frequency tolerance (ppm) */
  56 long time_precision = 1;        /* clock precision (us) */
  57 long time_maxerror = 0x70000000;/* maximum error */
  58 long time_esterror = 0x70000000;/* estimated error */
  59 long time_phase = 0;            /* phase offset (scaled us) */
  60 long time_freq = 0;             /* frequency offset (scaled ppm) */
  61 long time_adj = 0;              /* tick adjust (scaled 1 / HZ) */
  62 long time_reftime = 0;          /* time at last adjustment (s) */
  63 
  64 long time_adjust = 0;
  65 long time_adjust_step = 0;
  66 
  67 int need_resched = 0;
  68 unsigned long event = 0;
  69 
  70 extern int _setitimer(int, struct itimerval *, struct itimerval *);
  71 unsigned long * prof_buffer = NULL;
  72 unsigned long prof_len = 0;
  73 unsigned long prof_shift = 0;
  74 
  75 #define _S(nr) (1<<((nr)-1))
  76 
  77 extern void mem_use(void);
  78 
  79 static unsigned long init_kernel_stack[1024] = { STACK_MAGIC, };
  80 unsigned long init_user_stack[1024] = { STACK_MAGIC, };
  81 static struct vm_area_struct init_mmap = INIT_MMAP;
  82 static struct fs_struct init_fs = INIT_FS;
  83 static struct files_struct init_files = INIT_FILES;
  84 static struct signal_struct init_signals = INIT_SIGNALS;
  85 
  86 struct mm_struct init_mm = INIT_MM;
  87 struct task_struct init_task = INIT_TASK;
  88 
  89 unsigned long volatile jiffies=0;
  90 
  91 struct task_struct *current_set[NR_CPUS];
  92 struct task_struct *last_task_used_math = NULL;
  93 
  94 struct task_struct * task[NR_TASKS] = {&init_task, };
  95 
  96 struct kernel_stat kstat = { 0 };
  97 
  98 static inline void add_to_runqueue(struct task_struct * p)
     /* [previous][next][first][last][top][bottom][index][help] */
  99 {
 100 #if 1   /* sanity tests */
 101         if (p->next_run || p->prev_run) {
 102                 printk("task already on run-queue\n");
 103                 return;
 104         }
 105 #endif
 106         if (p->counter > current->counter + 3)
 107                 need_resched = 1;
 108         nr_running++;
 109         (p->prev_run = init_task.prev_run)->next_run = p;
 110         p->next_run = &init_task;
 111         init_task.prev_run = p;
 112 }
 113 
 114 static inline void del_from_runqueue(struct task_struct * p)
     /* [previous][next][first][last][top][bottom][index][help] */
 115 {
 116         struct task_struct *next = p->next_run;
 117         struct task_struct *prev = p->prev_run;
 118 
 119 #if 1   /* sanity tests */
 120         if (!next || !prev) {
 121                 printk("task not on run-queue\n");
 122                 return;
 123         }
 124 #endif
 125         if (p == &init_task) {
 126                 static int nr = 0;
 127                 if (nr < 5) {
 128                         nr++;
 129                         printk("idle task may not sleep\n");
 130                 }
 131                 return;
 132         }
 133         nr_running--;
 134         next->prev_run = prev;
 135         prev->next_run = next;
 136         p->next_run = NULL;
 137         p->prev_run = NULL;
 138 }
 139 
 140 static inline void move_last_runqueue(struct task_struct * p)
     /* [previous][next][first][last][top][bottom][index][help] */
 141 {
 142         struct task_struct *next = p->next_run;
 143         struct task_struct *prev = p->prev_run;
 144 
 145         next->prev_run = prev;
 146         prev->next_run = next;
 147         (p->prev_run = init_task.prev_run)->next_run = p;
 148         p->next_run = &init_task;
 149         init_task.prev_run = p;
 150 }
 151 
 152 /*
 153  * Wake up a process. Put it on the run-queue if it's not
 154  * already there.  The "current" process is always on the
 155  * run-queue (except when the actual re-schedule is in
 156  * progress), and as such you're allowed to do the simpler
 157  * "current->state = TASK_RUNNING" to mark yourself runnable
 158  * without the overhead of this.
 159  */
 160 inline void wake_up_process(struct task_struct * p)
     /* [previous][next][first][last][top][bottom][index][help] */
 161 {
 162         unsigned long flags;
 163 
 164         save_flags(flags);
 165         cli();
 166         p->state = TASK_RUNNING;
 167         if (!p->next_run)
 168                 add_to_runqueue(p);
 169         restore_flags(flags);
 170 }
 171 
 172 static void process_timeout(unsigned long __data)
     /* [previous][next][first][last][top][bottom][index][help] */
 173 {
 174         struct task_struct * p = (struct task_struct *) __data;
 175 
 176         p->timeout = 0;
 177         wake_up_process(p);
 178 }
 179 
 180 /*
 181  * This is the function that decides how desireable a process is..
 182  * You can weigh different processes against each other depending
 183  * on what CPU they've run on lately etc to try to handle cache
 184  * and TLB miss penalties.
 185  *
 186  * Return values:
 187  *       -1000: never select this
 188  *           0: out of time, recalculate counters (but it might still be
 189  *              selected)
 190  *         +ve: "goodness" value (the larger, the better)
 191  *       +1000: realtime process, select this.
 192  */
 193 static inline int goodness(struct task_struct * p, int this_cpu)
     /* [previous][next][first][last][top][bottom][index][help] */
 194 {
 195         int weight;
 196 
 197 #ifdef __SMP__  
 198         /* We are not permitted to run a task someone else is running */
 199         if (p->processor != NO_PROC_ID)
 200                 return -1000;
 201 #endif
 202 
 203         /*
 204          * Realtime process, select the first one on the
 205          * runqueue (taking priorities within processes
 206          * into account).
 207          */
 208         if (p->policy != SCHED_OTHER)
 209                 return 1000 + p->priority;
 210 
 211         /*
 212          * Give the process a first-approximation goodness value
 213          * according to the number of clock-ticks it has left.
 214          *
 215          * Don't do any other calculations if the time slice is
 216          * over..
 217          */
 218         weight = p->counter;
 219         if (weight) {
 220                         
 221 #ifdef __SMP__
 222                 /* Give a largish advantage to the same processor...   */
 223                 /* (this is equivalent to penalizing other processors) */
 224                 if (p->last_processor == this_cpu)
 225                         weight += PROC_CHANGE_PENALTY;
 226 #endif
 227 
 228                 /* .. and a slight advantage to the current process */
 229                 if (p == current)
 230                         weight += 1;
 231         }
 232 
 233         return weight;
 234 }
 235 
 236 /*
 237  *  'schedule()' is the scheduler function. It's a very simple and nice
 238  * scheduler: it's not perfect, but certainly works for most things.
 239  *
 240  * The goto is "interesting".
 241  *
 242  *   NOTE!!  Task 0 is the 'idle' task, which gets called when no other
 243  * tasks can run. It can not be killed, and it cannot sleep. The 'state'
 244  * information in task[0] is never used.
 245  */
 246 asmlinkage void schedule(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 247 {
 248         int c;
 249         struct task_struct * p;
 250         struct task_struct * next;
 251         unsigned long timeout = 0;
 252         int this_cpu=smp_processor_id();
 253 
 254 /* check alarm, wake up any interruptible tasks that have got a signal */
 255 
 256         if (intr_count) {
 257                 printk("Aiee: scheduling in interrupt\n");
 258                 return;
 259         }
 260         run_task_queue(&tq_scheduler);
 261 
 262         need_resched = 0;
 263         cli();
 264         /* move an exhausted RR process to be last.. */
 265         if (!current->counter && current->policy == SCHED_RR) {
 266                 current->counter = current->priority;
 267                 move_last_runqueue(current);
 268         }
 269         switch (current->state) {
 270                 case TASK_INTERRUPTIBLE:
 271                         if (current->signal & ~current->blocked)
 272                                 goto makerunnable;
 273                         timeout = current->timeout;
 274                         if (timeout && (timeout <= jiffies)) {
 275                                 current->timeout = 0;
 276                                 timeout = 0;
 277                 makerunnable:
 278                                 current->state = TASK_RUNNING;
 279                                 break;
 280                         }
 281                 default:
 282                         del_from_runqueue(current);
 283                 case TASK_RUNNING:
 284         }
 285         p = init_task.next_run;
 286         sti();
 287         
 288 #ifdef __SMP__
 289         /*
 290          *      This is safe as we do not permit re-entry of schedule()
 291          */
 292         current->processor = NO_PROC_ID;        
 293 #endif  
 294 
 295 /*
 296  * Note! there may appear new tasks on the run-queue during this, as
 297  * interrupts are enabled. However, they will be put on front of the
 298  * list, so our list starting at "p" is essentially fixed.
 299  */
 300 /* this is the scheduler proper: */
 301         c = -1000;
 302         next = &init_task;
 303         while (p != &init_task) {
 304                 int weight = goodness(p, this_cpu);
 305                 if (weight > c)
 306                         c = weight, next = p;
 307                 p = p->next_run;
 308         }
 309 
 310         /* if all runnable processes have "counter == 0", re-calculate counters */
 311         if (!c) {
 312                 for_each_task(p)
 313                         p->counter = (p->counter >> 1) + p->priority;
 314         }
 315 #ifdef __SMP__  
 316         
 317         /*
 318          *      Context switching between two idle threads is pointless.
 319          */
 320         if(!current->pid && !next->pid)
 321                 next=current;
 322         /*
 323          *      Allocate process to CPU
 324          */
 325          
 326          next->processor = this_cpu;
 327          next->last_processor = this_cpu;
 328          
 329 #endif   
 330         if (current != next) {
 331                 struct timer_list timer;
 332 
 333                 kstat.context_swtch++;
 334                 if (timeout) {
 335                         init_timer(&timer);
 336                         timer.expires = timeout;
 337                         timer.data = (unsigned long) current;
 338                         timer.function = process_timeout;
 339                         add_timer(&timer);
 340                 }
 341                 switch_to(next);
 342                 if (timeout)
 343                         del_timer(&timer);
 344         }
 345 }
 346 
 347 asmlinkage int sys_pause(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 348 {
 349         current->state = TASK_INTERRUPTIBLE;
 350         schedule();
 351         return -ERESTARTNOHAND;
 352 }
 353 
 354 /*
 355  * wake_up doesn't wake up stopped processes - they have to be awakened
 356  * with signals or similar.
 357  *
 358  * Note that this doesn't need cli-sti pairs: interrupts may not change
 359  * the wait-queue structures directly, but only call wake_up() to wake
 360  * a process. The process itself must remove the queue once it has woken.
 361  */
 362 void wake_up(struct wait_queue **q)
     /* [previous][next][first][last][top][bottom][index][help] */
 363 {
 364         struct wait_queue *tmp;
 365         struct task_struct * p;
 366 
 367         if (!q || !(tmp = *q))
 368                 return;
 369         do {
 370                 if ((p = tmp->task) != NULL) {
 371                         if ((p->state == TASK_UNINTERRUPTIBLE) ||
 372                             (p->state == TASK_INTERRUPTIBLE))
 373                                 wake_up_process(p);
 374                 }
 375                 if (!tmp->next) {
 376                         printk("wait_queue is bad (eip = %p)\n",
 377                                 __builtin_return_address(0));
 378                         printk("        q = %p\n",q);
 379                         printk("       *q = %p\n",*q);
 380                         printk("      tmp = %p\n",tmp);
 381                         break;
 382                 }
 383                 tmp = tmp->next;
 384         } while (tmp != *q);
 385 }
 386 
 387 void wake_up_interruptible(struct wait_queue **q)
     /* [previous][next][first][last][top][bottom][index][help] */
 388 {
 389         struct wait_queue *tmp;
 390         struct task_struct * p;
 391 
 392         if (!q || !(tmp = *q))
 393                 return;
 394         do {
 395                 if ((p = tmp->task) != NULL) {
 396                         if (p->state == TASK_INTERRUPTIBLE)
 397                                 wake_up_process(p);
 398                 }
 399                 if (!tmp->next) {
 400                         printk("wait_queue is bad (eip = %p)\n",
 401                                 __builtin_return_address(0));
 402                         printk("        q = %p\n",q);
 403                         printk("       *q = %p\n",*q);
 404                         printk("      tmp = %p\n",tmp);
 405                         break;
 406                 }
 407                 tmp = tmp->next;
 408         } while (tmp != *q);
 409 }
 410 
 411 void __down(struct semaphore * sem)
     /* [previous][next][first][last][top][bottom][index][help] */
 412 {
 413         struct wait_queue wait = { current, NULL };
 414         add_wait_queue(&sem->wait, &wait);
 415         current->state = TASK_UNINTERRUPTIBLE;
 416         while (sem->count <= 0) {
 417                 schedule();
 418                 current->state = TASK_UNINTERRUPTIBLE;
 419         }
 420         current->state = TASK_RUNNING;
 421         remove_wait_queue(&sem->wait, &wait);
 422 }
 423 
 424 static inline void __sleep_on(struct wait_queue **p, int state)
     /* [previous][next][first][last][top][bottom][index][help] */
 425 {
 426         unsigned long flags;
 427         struct wait_queue wait = { current, NULL };
 428 
 429         if (!p)
 430                 return;
 431         if (current == task[0])
 432                 panic("task[0] trying to sleep");
 433         current->state = state;
 434         add_wait_queue(p, &wait);
 435         save_flags(flags);
 436         sti();
 437         schedule();
 438         remove_wait_queue(p, &wait);
 439         restore_flags(flags);
 440 }
 441 
 442 void interruptible_sleep_on(struct wait_queue **p)
     /* [previous][next][first][last][top][bottom][index][help] */
 443 {
 444         __sleep_on(p,TASK_INTERRUPTIBLE);
 445 }
 446 
 447 void sleep_on(struct wait_queue **p)
     /* [previous][next][first][last][top][bottom][index][help] */
 448 {
 449         __sleep_on(p,TASK_UNINTERRUPTIBLE);
 450 }
 451 
 452 /*
 453  * The head for the timer-list has a "expires" field of MAX_UINT,
 454  * and the sorting routine counts on this..
 455  */
 456 static struct timer_list timer_head = { &timer_head, &timer_head, ~0, 0, NULL };
 457 #define SLOW_BUT_DEBUGGING_TIMERS 1
 458 
 459 void add_timer(struct timer_list * timer)
     /* [previous][next][first][last][top][bottom][index][help] */
 460 {
 461         unsigned long flags;
 462         struct timer_list *p;
 463 
 464 #if SLOW_BUT_DEBUGGING_TIMERS
 465         if (timer->next || timer->prev) {
 466                 printk("add_timer() called with non-zero list from %p\n",
 467                         __builtin_return_address(0));
 468                 return;
 469         }
 470 #endif
 471         p = &timer_head;
 472         save_flags(flags);
 473         cli();
 474         do {
 475                 p = p->next;
 476         } while (timer->expires > p->expires);
 477         timer->next = p;
 478         timer->prev = p->prev;
 479         p->prev = timer;
 480         timer->prev->next = timer;
 481         restore_flags(flags);
 482 }
 483 
 484 int del_timer(struct timer_list * timer)
     /* [previous][next][first][last][top][bottom][index][help] */
 485 {
 486         unsigned long flags;
 487 #if SLOW_BUT_DEBUGGING_TIMERS
 488         struct timer_list * p;
 489 
 490         p = &timer_head;
 491         save_flags(flags);
 492         cli();
 493         while ((p = p->next) != &timer_head) {
 494                 if (p == timer) {
 495                         timer->next->prev = timer->prev;
 496                         timer->prev->next = timer->next;
 497                         timer->next = timer->prev = NULL;
 498                         restore_flags(flags);
 499                         return 1;
 500                 }
 501         }
 502         if (timer->next || timer->prev)
 503                 printk("del_timer() called from %p with timer not initialized\n",
 504                         __builtin_return_address(0));
 505         restore_flags(flags);
 506         return 0;
 507 #else   
 508         save_flags(flags);
 509         cli();
 510         if (timer->next) {
 511                 timer->next->prev = timer->prev;
 512                 timer->prev->next = timer->next;
 513                 timer->next = timer->prev = NULL;
 514                 restore_flags(flags);
 515                 return 1;
 516         }
 517         restore_flags(flags);
 518         return 0;
 519 #endif
 520 }
 521 
 522 unsigned long timer_active = 0;
 523 struct timer_struct timer_table[32];
 524 
 525 /*
 526  * Hmm.. Changed this, as the GNU make sources (load.c) seems to
 527  * imply that avenrun[] is the standard name for this kind of thing.
 528  * Nothing else seems to be standardized: the fractional size etc
 529  * all seem to differ on different machines.
 530  */
 531 unsigned long avenrun[3] = { 0,0,0 };
 532 
 533 /*
 534  * Nr of active tasks - counted in fixed-point numbers
 535  */
 536 static unsigned long count_active_tasks(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 537 {
 538         struct task_struct **p;
 539         unsigned long nr = 0;
 540 
 541         for(p = &LAST_TASK; p > &FIRST_TASK; --p)
 542                 if (*p && ((*p)->state == TASK_RUNNING ||
 543                            (*p)->state == TASK_UNINTERRUPTIBLE ||
 544                            (*p)->state == TASK_SWAPPING))
 545                         nr += FIXED_1;
 546 #ifdef __SMP__
 547         nr-=(smp_num_cpus-1)*FIXED_1;
 548 #endif                  
 549         return nr;
 550 }
 551 
 552 static inline void calc_load(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 553 {
 554         unsigned long active_tasks; /* fixed-point */
 555         static int count = LOAD_FREQ;
 556 
 557         if (count-- > 0)
 558                 return;
 559         count = LOAD_FREQ;
 560         active_tasks = count_active_tasks();
 561         CALC_LOAD(avenrun[0], EXP_1, active_tasks);
 562         CALC_LOAD(avenrun[1], EXP_5, active_tasks);
 563         CALC_LOAD(avenrun[2], EXP_15, active_tasks);
 564 }
 565 
 566 /*
 567  * this routine handles the overflow of the microsecond field
 568  *
 569  * The tricky bits of code to handle the accurate clock support
 570  * were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame.
 571  * They were originally developed for SUN and DEC kernels.
 572  * All the kudos should go to Dave for this stuff.
 573  *
 574  */
 575 static void second_overflow(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 576 {
 577     long ltemp;
 578 
 579     /* Bump the maxerror field */
 580     time_maxerror = (0x70000000-time_maxerror <
 581                      time_tolerance >> SHIFT_USEC) ?
 582         0x70000000 : (time_maxerror + (time_tolerance >> SHIFT_USEC));
 583 
 584     /*
 585      * Leap second processing. If in leap-insert state at
 586      * the end of the day, the system clock is set back one
 587      * second; if in leap-delete state, the system clock is
 588      * set ahead one second. The microtime() routine or
 589      * external clock driver will insure that reported time
 590      * is always monotonic. The ugly divides should be
 591      * replaced.
 592      */
 593     switch (time_state) {
 594 
 595     case TIME_OK:
 596         if (time_status & STA_INS)
 597             time_state = TIME_INS;
 598         else if (time_status & STA_DEL)
 599             time_state = TIME_DEL;
 600         break;
 601 
 602     case TIME_INS:
 603         if (xtime.tv_sec % 86400 == 0) {
 604             xtime.tv_sec--;
 605             time_state = TIME_OOP;
 606             printk("Clock: inserting leap second 23:59:60 UTC\n");
 607         }
 608         break;
 609 
 610     case TIME_DEL:
 611         if ((xtime.tv_sec + 1) % 86400 == 0) {
 612             xtime.tv_sec++;
 613             time_state = TIME_WAIT;
 614             printk("Clock: deleting leap second 23:59:59 UTC\n");
 615         }
 616         break;
 617 
 618     case TIME_OOP:
 619 
 620         time_state = TIME_WAIT;
 621         break;
 622 
 623     case TIME_WAIT:
 624         if (!(time_status & (STA_INS | STA_DEL)))
 625             time_state = TIME_OK;
 626     }
 627 
 628     /*
 629      * Compute the phase adjustment for the next second. In
 630      * PLL mode, the offset is reduced by a fixed factor
 631      * times the time constant. In FLL mode the offset is
 632      * used directly. In either mode, the maximum phase
 633      * adjustment for each second is clamped so as to spread
 634      * the adjustment over not more than the number of
 635      * seconds between updates.
 636      */
 637     if (time_offset < 0) {
 638         ltemp = -time_offset;
 639         if (!(time_status & STA_FLL))
 640             ltemp >>= SHIFT_KG + time_constant;
 641         if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE)
 642             ltemp = (MAXPHASE / MINSEC) <<
 643                 SHIFT_UPDATE;
 644         time_offset += ltemp;
 645         time_adj = -ltemp << (SHIFT_SCALE - SHIFT_HZ -
 646                               SHIFT_UPDATE);
 647     } else {
 648         ltemp = time_offset;
 649         if (!(time_status & STA_FLL))
 650             ltemp >>= SHIFT_KG + time_constant;
 651         if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE)
 652             ltemp = (MAXPHASE / MINSEC) <<
 653                 SHIFT_UPDATE;
 654         time_offset -= ltemp;
 655         time_adj = ltemp << (SHIFT_SCALE - SHIFT_HZ -
 656                              SHIFT_UPDATE);
 657     }
 658 
 659     /*
 660      * Compute the frequency estimate and additional phase
 661      * adjustment due to frequency error for the next
 662      * second. When the PPS signal is engaged, gnaw on the
 663      * watchdog counter and update the frequency computed by
 664      * the pll and the PPS signal.
 665      */
 666     pps_valid++;
 667     if (pps_valid == PPS_VALID) {
 668         pps_jitter = MAXTIME;
 669         pps_stabil = MAXFREQ;
 670         time_status &= ~(STA_PPSSIGNAL | STA_PPSJITTER |
 671                          STA_PPSWANDER | STA_PPSERROR);
 672     }
 673     ltemp = time_freq + pps_freq;
 674     if (ltemp < 0)
 675         time_adj -= -ltemp >>
 676             (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE);
 677     else
 678         time_adj += ltemp >>
 679             (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE);
 680 
 681 #if HZ == 100
 682     /* compensate for (HZ==100) != 128. Add 25% to get 125; => only 3% error */
 683     if (time_adj < 0)
 684         time_adj -= -time_adj >> 2;
 685     else
 686         time_adj += time_adj >> 2;
 687 #endif
 688 }
 689 
 690 /*
 691  * disregard lost ticks for now.. We don't care enough.
 692  */
 693 static void timer_bh(void * unused)
     /* [previous][next][first][last][top][bottom][index][help] */
 694 {
 695         unsigned long mask;
 696         struct timer_struct *tp;
 697         struct timer_list * timer;
 698 
 699         cli();
 700         while ((timer = timer_head.next) != &timer_head && timer->expires <= jiffies) {
 701                 void (*fn)(unsigned long) = timer->function;
 702                 unsigned long data = timer->data;
 703                 timer->next->prev = timer->prev;
 704                 timer->prev->next = timer->next;
 705                 timer->next = timer->prev = NULL;
 706                 sti();
 707                 fn(data);
 708                 cli();
 709         }
 710         sti();
 711         
 712         for (mask = 1, tp = timer_table+0 ; mask ; tp++,mask += mask) {
 713                 if (mask > timer_active)
 714                         break;
 715                 if (!(mask & timer_active))
 716                         continue;
 717                 if (tp->expires > jiffies)
 718                         continue;
 719                 timer_active &= ~mask;
 720                 tp->fn();
 721                 sti();
 722         }
 723 }
 724 
 725 void tqueue_bh(void * unused)
     /* [previous][next][first][last][top][bottom][index][help] */
 726 {
 727         run_task_queue(&tq_timer);
 728 }
 729 
 730 void immediate_bh(void * unused)
     /* [previous][next][first][last][top][bottom][index][help] */
 731 {
 732         run_task_queue(&tq_immediate);
 733 }
 734 
 735 void do_timer(struct pt_regs * regs)
     /* [previous][next][first][last][top][bottom][index][help] */
 736 {
 737         unsigned long mask;
 738         struct timer_struct *tp;
 739         long ltemp, psecs;
 740 
 741         /* Advance the phase, once it gets to one microsecond, then
 742          * advance the tick more.
 743          */
 744         time_phase += time_adj;
 745         if (time_phase <= -FINEUSEC) {
 746                 ltemp = -time_phase >> SHIFT_SCALE;
 747                 time_phase += ltemp << SHIFT_SCALE;
 748                 xtime.tv_usec += tick + time_adjust_step - ltemp;
 749         }
 750         else if (time_phase >= FINEUSEC) {
 751                 ltemp = time_phase >> SHIFT_SCALE;
 752                 time_phase -= ltemp << SHIFT_SCALE;
 753                 xtime.tv_usec += tick + time_adjust_step + ltemp;
 754         } else
 755                 xtime.tv_usec += tick + time_adjust_step;
 756 
 757         if (time_adjust) {
 758             /* We are doing an adjtime thing. 
 759              *
 760              * Modify the value of the tick for next time.
 761              * Note that a positive delta means we want the clock
 762              * to run fast. This means that the tick should be bigger
 763              *
 764              * Limit the amount of the step for *next* tick to be
 765              * in the range -tickadj .. +tickadj
 766              */
 767              if (time_adjust > tickadj)
 768                time_adjust_step = tickadj;
 769              else if (time_adjust < -tickadj)
 770                time_adjust_step = -tickadj;
 771              else
 772                time_adjust_step = time_adjust;
 773              
 774             /* Reduce by this step the amount of time left  */
 775             time_adjust -= time_adjust_step;
 776         }
 777         else
 778             time_adjust_step = 0;
 779 
 780         if (xtime.tv_usec >= 1000000) {
 781             xtime.tv_usec -= 1000000;
 782             xtime.tv_sec++;
 783             second_overflow();
 784         }
 785 
 786         jiffies++;
 787         calc_load();
 788         if (user_mode(regs)) {
 789                 current->utime++;
 790                 if (current->pid) {
 791                         if (current->priority < DEF_PRIORITY)
 792                                 kstat.cpu_nice++;
 793                         else
 794                                 kstat.cpu_user++;
 795                 }
 796                 /* Update ITIMER_VIRT for current task if not in a system call */
 797                 if (current->it_virt_value && !(--current->it_virt_value)) {
 798                         current->it_virt_value = current->it_virt_incr;
 799                         send_sig(SIGVTALRM,current,1);
 800                 }
 801         } else {
 802                 current->stime++;
 803                 if(current->pid)
 804                         kstat.cpu_system++;
 805                 if (prof_buffer && current->pid) {
 806                         extern int _stext;
 807                         unsigned long ip = instruction_pointer(regs);
 808                         ip -= (unsigned long) &_stext;
 809                         ip >>= prof_shift;
 810                         if (ip < prof_len)
 811                                 prof_buffer[ip]++;
 812                 }
 813         }
 814         /*
 815          * check the cpu time limit on the process.
 816          */
 817         if ((current->rlim[RLIMIT_CPU].rlim_max != RLIM_INFINITY) &&
 818             (((current->stime + current->utime) / HZ) >= current->rlim[RLIMIT_CPU].rlim_max))
 819                 send_sig(SIGKILL, current, 1);
 820         if ((current->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) &&
 821             (((current->stime + current->utime) % HZ) == 0)) {
 822                 psecs = (current->stime + current->utime) / HZ;
 823                 /* send when equal */
 824                 if (psecs == current->rlim[RLIMIT_CPU].rlim_cur)
 825                         send_sig(SIGXCPU, current, 1);
 826                 /* and every five seconds thereafter. */
 827                 else if ((psecs > current->rlim[RLIMIT_CPU].rlim_cur) &&
 828                         ((psecs - current->rlim[RLIMIT_CPU].rlim_cur) % 5) == 0)
 829                         send_sig(SIGXCPU, current, 1);
 830         }
 831 
 832         if (current->pid && 0 > --current->counter) {
 833                 current->counter = 0;
 834                 need_resched = 1;
 835         }
 836         /* Update ITIMER_PROF for the current task */
 837         if (current->it_prof_value && !(--current->it_prof_value)) {
 838                 current->it_prof_value = current->it_prof_incr;
 839                 send_sig(SIGPROF,current,1);
 840         }
 841         for (mask = 1, tp = timer_table+0 ; mask ; tp++,mask += mask) {
 842                 if (mask > timer_active)
 843                         break;
 844                 if (!(mask & timer_active))
 845                         continue;
 846                 if (tp->expires > jiffies)
 847                         continue;
 848                 mark_bh(TIMER_BH);
 849         }
 850         cli();
 851         if (timer_head.next->expires <= jiffies)
 852                 mark_bh(TIMER_BH);
 853         if (tq_timer != &tq_last)
 854                 mark_bh(TQUEUE_BH);
 855         sti();
 856 }
 857 
 858 asmlinkage unsigned int sys_alarm(unsigned int seconds)
     /* [previous][next][first][last][top][bottom][index][help] */
 859 {
 860         struct itimerval it_new, it_old;
 861         unsigned int oldalarm;
 862 
 863         it_new.it_interval.tv_sec = it_new.it_interval.tv_usec = 0;
 864         it_new.it_value.tv_sec = seconds;
 865         it_new.it_value.tv_usec = 0;
 866         _setitimer(ITIMER_REAL, &it_new, &it_old);
 867         oldalarm = it_old.it_value.tv_sec;
 868         /* ehhh.. We can't return 0 if we have an alarm pending.. */
 869         /* And we'd better return too much than too little anyway */
 870         if (it_old.it_value.tv_usec)
 871                 oldalarm++;
 872         return oldalarm;
 873 }
 874 
 875 asmlinkage int sys_getpid(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 876 {
 877         return current->pid;
 878 }
 879 
 880 asmlinkage int sys_getppid(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 881 {
 882         return current->p_opptr->pid;
 883 }
 884 
 885 asmlinkage int sys_getuid(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 886 {
 887         return current->uid;
 888 }
 889 
 890 asmlinkage int sys_geteuid(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 891 {
 892         return current->euid;
 893 }
 894 
 895 asmlinkage int sys_getgid(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 896 {
 897         return current->gid;
 898 }
 899 
 900 asmlinkage int sys_getegid(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 901 {
 902         return current->egid;
 903 }
 904 
 905 asmlinkage int sys_nice(int increment)
     /* [previous][next][first][last][top][bottom][index][help] */
 906 {
 907         unsigned long newprio;
 908         int increase = 0;
 909 
 910         newprio = increment;
 911         if (increment < 0) {
 912                 if (!suser())
 913                         return -EPERM;
 914                 newprio = -increment;
 915                 increase = 1;
 916         }
 917         if (newprio > 40)
 918                 newprio = 40;
 919         /*
 920          * do a "normalization" of the priority (traditionally
 921          * unix nice values are -20..20, linux doesn't really
 922          * use that kind of thing, but uses the length of the
 923          * timeslice instead (default 150 msec). The rounding is
 924          * why we want to avoid negative values.
 925          */
 926         newprio = (newprio * DEF_PRIORITY + 10) / 20;
 927         increment = newprio;
 928         if (increase)
 929                 increment = -increment;
 930         newprio = current->priority - increment;
 931         if (newprio < 1)
 932                 newprio = 1;
 933         if (newprio > DEF_PRIORITY*2)
 934                 newprio = DEF_PRIORITY*2;
 935         current->priority = newprio;
 936         return 0;
 937 }
 938 
 939 static void show_task(int nr,struct task_struct * p)
     /* [previous][next][first][last][top][bottom][index][help] */
 940 {
 941         unsigned long free;
 942         static const char * stat_nam[] = { "R", "S", "D", "Z", "T", "W" };
 943 
 944         printk("%-8s %3d ", p->comm, (p == current) ? -nr : nr);
 945         if (((unsigned) p->state) < sizeof(stat_nam)/sizeof(char *))
 946                 printk(stat_nam[p->state]);
 947         else
 948                 printk(" ");
 949 #if ((~0UL) == 0xffffffff)
 950         if (p == current)
 951                 printk(" current  ");
 952         else
 953                 printk(" %08lX ", thread_saved_pc(&p->tss));
 954 #else
 955         if (p == current)
 956                 printk("   current task   ");
 957         else
 958                 printk(" %016lx ", thread_saved_pc(&p->tss));
 959 #endif
 960         for (free = 1; free < PAGE_SIZE/sizeof(long) ; free++) {
 961                 if (((unsigned long *)p->kernel_stack_page)[free])
 962                         break;
 963         }
 964         printk("%5lu %5d %6d ", free*sizeof(long), p->pid, p->p_pptr->pid);
 965         if (p->p_cptr)
 966                 printk("%5d ", p->p_cptr->pid);
 967         else
 968                 printk("      ");
 969         if (p->p_ysptr)
 970                 printk("%7d", p->p_ysptr->pid);
 971         else
 972                 printk("       ");
 973         if (p->p_osptr)
 974                 printk(" %5d\n", p->p_osptr->pid);
 975         else
 976                 printk("\n");
 977 }
 978 
 979 void show_state(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 980 {
 981         int i;
 982 
 983 #if ((~0UL) == 0xffffffff)
 984         printk("\n"
 985                "                         free                        sibling\n");
 986         printk("  task             PC    stack   pid father child younger older\n");
 987 #else
 988         printk("\n"
 989                "                                 free                        sibling\n");
 990         printk("  task                 PC        stack   pid father child younger older\n");
 991 #endif
 992         for (i=0 ; i<NR_TASKS ; i++)
 993                 if (task[i])
 994                         show_task(i,task[i]);
 995 }
 996 
 997 void sched_init(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 998 {
 999         /*
1000          *      We have to do a little magic to get the first
1001          *      process right in SMP mode.
1002          */
1003         int cpu=smp_processor_id();
1004         current_set[cpu]=&init_task;
1005 #ifdef __SMP__  
1006         init_task.processor=cpu;
1007 #endif
1008         bh_base[TIMER_BH].routine = timer_bh;
1009         bh_base[TQUEUE_BH].routine = tqueue_bh;
1010         bh_base[IMMEDIATE_BH].routine = immediate_bh;
1011         enable_bh(TIMER_BH);
1012         enable_bh(TQUEUE_BH);
1013         enable_bh(IMMEDIATE_BH);
1014 }

/* [previous][next][first][last][top][bottom][index][help] */