root/kernel/sched.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. add_to_runqueue
  2. del_from_runqueue
  3. wake_up_process
  4. process_timeout
  5. goodness
  6. schedule
  7. sys_pause
  8. wake_up
  9. wake_up_interruptible
  10. __down
  11. __sleep_on
  12. interruptible_sleep_on
  13. sleep_on
  14. add_timer
  15. del_timer
  16. count_active_tasks
  17. calc_load
  18. second_overflow
  19. timer_bh
  20. tqueue_bh
  21. immediate_bh
  22. do_timer
  23. sys_alarm
  24. sys_getpid
  25. sys_getppid
  26. sys_getuid
  27. sys_geteuid
  28. sys_getgid
  29. sys_getegid
  30. sys_nice
  31. show_task
  32. show_state
  33. sched_init

   1 /*
   2  *  linux/kernel/sched.c
   3  *
   4  *  Copyright (C) 1991, 1992  Linus Torvalds
   5  */
   6 
   7 /*
   8  * 'sched.c' is the main kernel file. It contains scheduling primitives
   9  * (sleep_on, wakeup, schedule etc) as well as a number of simple system
  10  * call functions (type getpid(), which just extracts a field from
  11  * current-task
  12  */
  13 
  14 #include <linux/config.h>
  15 #include <linux/signal.h>
  16 #include <linux/sched.h>
  17 #include <linux/timer.h>
  18 #include <linux/kernel.h>
  19 #include <linux/kernel_stat.h>
  20 #include <linux/fdreg.h>
  21 #include <linux/errno.h>
  22 #include <linux/time.h>
  23 #include <linux/ptrace.h>
  24 #include <linux/delay.h>
  25 #include <linux/interrupt.h>
  26 #include <linux/tqueue.h>
  27 #include <linux/resource.h>
  28 #include <linux/mm.h>
  29 #include <linux/smp.h>
  30 
  31 #include <asm/system.h>
  32 #include <asm/io.h>
  33 #include <asm/segment.h>
  34 #include <asm/pgtable.h>
  35 
  36 #include <linux/timex.h>
  37 
  38 /*
  39  * kernel variables
  40  */
  41 long tick = 1000000 / HZ;               /* timer interrupt period */
  42 volatile struct timeval xtime;          /* The current time */
  43 int tickadj = 500/HZ;                   /* microsecs */
  44 
  45 DECLARE_TASK_QUEUE(tq_timer);
  46 DECLARE_TASK_QUEUE(tq_immediate);
  47 DECLARE_TASK_QUEUE(tq_scheduler);
  48 
  49 /*
  50  * phase-lock loop variables
  51  */
  52 int time_state = TIME_BAD;     /* clock synchronization status */
  53 int time_status = STA_UNSYNC;   /* clock status bits */
  54 long time_offset = 0;           /* time adjustment (us) */
  55 long time_constant = 0;         /* pll time constant */
  56 long time_tolerance = MAXFREQ;  /* frequency tolerance (ppm) */
  57 long time_precision = 1;        /* clock precision (us) */
  58 long time_maxerror = 0x70000000;/* maximum error */
  59 long time_esterror = 0x70000000;/* estimated error */
  60 long time_phase = 0;            /* phase offset (scaled us) */
  61 long time_freq = 0;             /* frequency offset (scaled ppm) */
  62 long time_adj = 0;              /* tick adjust (scaled 1 / HZ) */
  63 long time_reftime = 0;          /* time at last adjustment (s) */
  64 
  65 long time_adjust = 0;
  66 long time_adjust_step = 0;
  67 
  68 int need_resched = 0;
  69 unsigned long event = 0;
  70 
  71 extern int _setitimer(int, struct itimerval *, struct itimerval *);
  72 unsigned long * prof_buffer = NULL;
  73 unsigned long prof_len = 0;
  74 unsigned long prof_shift = 0;
  75 
  76 #define _S(nr) (1<<((nr)-1))
  77 
  78 extern void mem_use(void);
  79 
  80 static unsigned long init_kernel_stack[1024] = { STACK_MAGIC, };
  81 unsigned long init_user_stack[1024] = { STACK_MAGIC, };
  82 static struct vm_area_struct init_mmap = INIT_MMAP;
  83 static struct fs_struct init_fs = INIT_FS;
  84 static struct files_struct init_files = INIT_FILES;
  85 static struct signal_struct init_signals = INIT_SIGNALS;
  86 
  87 struct mm_struct init_mm = INIT_MM;
  88 struct task_struct init_task = INIT_TASK;
  89 
  90 unsigned long volatile jiffies=0;
  91 
  92 struct task_struct *current_set[NR_CPUS];
  93 struct task_struct *last_task_used_math = NULL;
  94 
  95 struct task_struct * task[NR_TASKS] = {&init_task, };
  96 
  97 struct kernel_stat kstat = { 0 };
  98 
  99 static inline void add_to_runqueue(struct task_struct * p)
     /* [previous][next][first][last][top][bottom][index][help] */
 100 {
 101 #if 1   /* sanity tests */
 102         if (p->next_run || p->prev_run) {
 103                 printk("task already on run-queue\n");
 104                 return;
 105         }
 106 #endif
 107         if (p->counter > current->counter + 3)
 108                 need_resched = 1;
 109         nr_running++;
 110         (p->next_run = init_task.next_run)->prev_run = p;
 111         p->prev_run = &init_task;
 112         init_task.next_run = p;
 113 }
 114 
 115 static inline void del_from_runqueue(struct task_struct * p)
     /* [previous][next][first][last][top][bottom][index][help] */
 116 {
 117         struct task_struct *next = p->next_run;
 118         struct task_struct *prev = p->prev_run;
 119 
 120 #if 1   /* sanity tests */
 121         if (!next || !prev) {
 122                 printk("task not on run-queue\n");
 123                 return;
 124         }
 125 #endif
 126         if (p == &init_task) {
 127                 static int nr = 0;
 128                 if (nr < 5) {
 129                         nr++;
 130                         printk("idle task may not sleep\n");
 131                 }
 132                 return;
 133         }
 134         nr_running--;
 135         next->prev_run = prev;
 136         prev->next_run = next;
 137         p->next_run = NULL;
 138         p->prev_run = NULL;
 139 }
 140 
 141 /*
 142  * Wake up a process. Put it on the run-queue if it's not
 143  * already there.  The "current" process is always on the
 144  * run-queue (except when the actual re-schedule is in
 145  * progress), and as such you're allowed to do the simpler
 146  * "current->state = TASK_RUNNING" to mark yourself runnable
 147  * without the overhead of this.
 148  */
 149 inline void wake_up_process(struct task_struct * p)
     /* [previous][next][first][last][top][bottom][index][help] */
 150 {
 151         unsigned long flags;
 152 
 153         save_flags(flags);
 154         cli();
 155         p->state = TASK_RUNNING;
 156         if (!p->next_run)
 157                 add_to_runqueue(p);
 158         restore_flags(flags);
 159 }
 160 
 161 static void process_timeout(unsigned long __data)
     /* [previous][next][first][last][top][bottom][index][help] */
 162 {
 163         struct task_struct * p = (struct task_struct *) __data;
 164 
 165         p->timeout = 0;
 166         wake_up_process(p);
 167 }
 168 
 169 /*
 170  * This is the function that decides how desireable a process is..
 171  * You can weigh different processes against each other depending
 172  * on what CPU they've run on lately etc to try to handle cache
 173  * and TLB miss penalties.
 174  *
 175  * Return values:
 176  *       -1000: never select this
 177  *           0: out of time, recalculate counters (but it might still be
 178  *              selected)
 179  *         +ve: "goodness" value (the larger, the better)
 180  *       +1000: realtime process, select this.
 181  */
 182 static inline int goodness(struct task_struct * p, int this_cpu)
     /* [previous][next][first][last][top][bottom][index][help] */
 183 {
 184         int weight;
 185 
 186 #ifdef CONFIG_SMP       
 187         /* We are not permitted to run a task someone else is running */
 188         if (p->processor != NO_PROC_ID)
 189                 return -1000;
 190 #endif
 191 
 192         /*
 193          * Give the process a first-approximation goodness value
 194          * according to the number of clock-ticks it has left.
 195          *
 196          * Don't do any other calculations if the time slice is
 197          * over..
 198          */
 199         weight = p->counter;
 200         if (weight) {
 201 
 202 #ifdef CONFIG_SMP
 203                 /* Give a largish advantage to the same processor...   */
 204                 /* (this is equivalent to penalizing other processors) */
 205                 if (p->last_processor == this_cpu)
 206                         weight += PROC_CHANGE_PENALTY;
 207 #endif
 208 
 209                 /* .. and a slight advantage to the current process */
 210                 if (p == current)
 211                         weight += 1;
 212         }
 213 
 214         return weight;
 215 }
 216 
 217 /*
 218  *  'schedule()' is the scheduler function. It's a very simple and nice
 219  * scheduler: it's not perfect, but certainly works for most things.
 220  *
 221  * The goto is "interesting".
 222  *
 223  *   NOTE!!  Task 0 is the 'idle' task, which gets called when no other
 224  * tasks can run. It can not be killed, and it cannot sleep. The 'state'
 225  * information in task[0] is never used.
 226  */
 227 asmlinkage void schedule(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 228 {
 229         int c;
 230         struct task_struct * p;
 231         struct task_struct * next;
 232         unsigned long timeout = 0;
 233         int this_cpu=smp_processor_id();
 234 
 235 /* check alarm, wake up any interruptible tasks that have got a signal */
 236 
 237         if (intr_count) {
 238                 printk("Aiee: scheduling in interrupt\n");
 239                 return;
 240         }
 241         run_task_queue(&tq_scheduler);
 242 
 243         need_resched = 0;
 244         cli();
 245         switch (current->state) {
 246                 case TASK_INTERRUPTIBLE:
 247                         if (current->signal & ~current->blocked)
 248                                 goto makerunnable;
 249                         timeout = current->timeout;
 250                         if (timeout && (timeout <= jiffies)) {
 251                                 current->timeout = 0;
 252                                 timeout = 0;
 253                 makerunnable:
 254                                 current->state = TASK_RUNNING;
 255                                 break;
 256                         }
 257                 default:
 258                         del_from_runqueue(current);
 259                 case TASK_RUNNING:
 260         }
 261         p = init_task.next_run;
 262         sti();
 263         
 264 #ifdef CONFIG_SMP
 265         /*
 266          *      This is safe as we do not permit re-entry of schedule()
 267          */
 268         current->processor = NO_PROC_ID;        
 269 #endif  
 270 
 271 /*
 272  * Note! there may appear new tasks on the run-queue during this, as
 273  * interrupts are enabled. However, they will be put on front of the
 274  * list, so our list starting at "p" is essentially fixed.
 275  */
 276 /* this is the scheduler proper: */
 277         c = -1000;
 278         next = &init_task;
 279         while (p != &init_task) {
 280                 int weight = goodness(p, this_cpu);
 281                 if (weight > c)
 282                         c = weight, next = p;
 283                 p = p->next_run;
 284         }
 285 
 286         /* if all runnable processes have "counter == 0", re-calculate counters */
 287         if (!c) {
 288                 for_each_task(p)
 289                         p->counter = (p->counter >> 1) + p->priority;
 290         }
 291 #ifdef CONFIG_SMP       
 292         
 293         /*
 294          *      Context switching between two idle threads is pointless.
 295          */
 296         if(!current->pid && !next->pid)
 297                 next=current;
 298         /*
 299          *      Allocate process to CPU
 300          */
 301          
 302          next->processor = this_cpu;
 303          next->last_processor = this_cpu;
 304          
 305 #endif   
 306         if (current != next) {
 307                 struct timer_list timer;
 308 
 309                 kstat.context_swtch++;
 310                 if (timeout) {
 311                         init_timer(&timer);
 312                         timer.expires = timeout;
 313                         timer.data = (unsigned long) current;
 314                         timer.function = process_timeout;
 315                         add_timer(&timer);
 316                 }
 317                 switch_to(next);
 318                 if (timeout)
 319                         del_timer(&timer);
 320         }
 321 }
 322 
 323 asmlinkage int sys_pause(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 324 {
 325         current->state = TASK_INTERRUPTIBLE;
 326         schedule();
 327         return -ERESTARTNOHAND;
 328 }
 329 
 330 /*
 331  * wake_up doesn't wake up stopped processes - they have to be awakened
 332  * with signals or similar.
 333  *
 334  * Note that this doesn't need cli-sti pairs: interrupts may not change
 335  * the wait-queue structures directly, but only call wake_up() to wake
 336  * a process. The process itself must remove the queue once it has woken.
 337  */
 338 void wake_up(struct wait_queue **q)
     /* [previous][next][first][last][top][bottom][index][help] */
 339 {
 340         struct wait_queue *tmp;
 341         struct task_struct * p;
 342 
 343         if (!q || !(tmp = *q))
 344                 return;
 345         do {
 346                 if ((p = tmp->task) != NULL) {
 347                         if ((p->state == TASK_UNINTERRUPTIBLE) ||
 348                             (p->state == TASK_INTERRUPTIBLE))
 349                                 wake_up_process(p);
 350                 }
 351                 if (!tmp->next) {
 352                         printk("wait_queue is bad (eip = %p)\n",
 353                                 __builtin_return_address(0));
 354                         printk("        q = %p\n",q);
 355                         printk("       *q = %p\n",*q);
 356                         printk("      tmp = %p\n",tmp);
 357                         break;
 358                 }
 359                 tmp = tmp->next;
 360         } while (tmp != *q);
 361 }
 362 
 363 void wake_up_interruptible(struct wait_queue **q)
     /* [previous][next][first][last][top][bottom][index][help] */
 364 {
 365         struct wait_queue *tmp;
 366         struct task_struct * p;
 367 
 368         if (!q || !(tmp = *q))
 369                 return;
 370         do {
 371                 if ((p = tmp->task) != NULL) {
 372                         if (p->state == TASK_INTERRUPTIBLE)
 373                                 wake_up_process(p);
 374                 }
 375                 if (!tmp->next) {
 376                         printk("wait_queue is bad (eip = %p)\n",
 377                                 __builtin_return_address(0));
 378                         printk("        q = %p\n",q);
 379                         printk("       *q = %p\n",*q);
 380                         printk("      tmp = %p\n",tmp);
 381                         break;
 382                 }
 383                 tmp = tmp->next;
 384         } while (tmp != *q);
 385 }
 386 
 387 void __down(struct semaphore * sem)
     /* [previous][next][first][last][top][bottom][index][help] */
 388 {
 389         struct wait_queue wait = { current, NULL };
 390         add_wait_queue(&sem->wait, &wait);
 391         current->state = TASK_UNINTERRUPTIBLE;
 392         while (sem->count <= 0) {
 393                 schedule();
 394                 current->state = TASK_UNINTERRUPTIBLE;
 395         }
 396         current->state = TASK_RUNNING;
 397         remove_wait_queue(&sem->wait, &wait);
 398 }
 399 
 400 static inline void __sleep_on(struct wait_queue **p, int state)
     /* [previous][next][first][last][top][bottom][index][help] */
 401 {
 402         unsigned long flags;
 403         struct wait_queue wait = { current, NULL };
 404 
 405         if (!p)
 406                 return;
 407         if (current == task[0])
 408                 panic("task[0] trying to sleep");
 409         current->state = state;
 410         add_wait_queue(p, &wait);
 411         save_flags(flags);
 412         sti();
 413         schedule();
 414         remove_wait_queue(p, &wait);
 415         restore_flags(flags);
 416 }
 417 
 418 void interruptible_sleep_on(struct wait_queue **p)
     /* [previous][next][first][last][top][bottom][index][help] */
 419 {
 420         __sleep_on(p,TASK_INTERRUPTIBLE);
 421 }
 422 
 423 void sleep_on(struct wait_queue **p)
     /* [previous][next][first][last][top][bottom][index][help] */
 424 {
 425         __sleep_on(p,TASK_UNINTERRUPTIBLE);
 426 }
 427 
 428 /*
 429  * The head for the timer-list has a "expires" field of MAX_UINT,
 430  * and the sorting routine counts on this..
 431  */
 432 static struct timer_list timer_head = { &timer_head, &timer_head, ~0, 0, NULL };
 433 #define SLOW_BUT_DEBUGGING_TIMERS 1
 434 
 435 void add_timer(struct timer_list * timer)
     /* [previous][next][first][last][top][bottom][index][help] */
 436 {
 437         unsigned long flags;
 438         struct timer_list *p;
 439 
 440 #if SLOW_BUT_DEBUGGING_TIMERS
 441         if (timer->next || timer->prev) {
 442                 printk("add_timer() called with non-zero list from %p\n",
 443                         __builtin_return_address(0));
 444                 return;
 445         }
 446 #endif
 447         p = &timer_head;
 448         save_flags(flags);
 449         cli();
 450         do {
 451                 p = p->next;
 452         } while (timer->expires > p->expires);
 453         timer->next = p;
 454         timer->prev = p->prev;
 455         p->prev = timer;
 456         timer->prev->next = timer;
 457         restore_flags(flags);
 458 }
 459 
 460 int del_timer(struct timer_list * timer)
     /* [previous][next][first][last][top][bottom][index][help] */
 461 {
 462         unsigned long flags;
 463 #if SLOW_BUT_DEBUGGING_TIMERS
 464         struct timer_list * p;
 465 
 466         p = &timer_head;
 467         save_flags(flags);
 468         cli();
 469         while ((p = p->next) != &timer_head) {
 470                 if (p == timer) {
 471                         timer->next->prev = timer->prev;
 472                         timer->prev->next = timer->next;
 473                         timer->next = timer->prev = NULL;
 474                         restore_flags(flags);
 475                         return 1;
 476                 }
 477         }
 478         if (timer->next || timer->prev)
 479                 printk("del_timer() called from %p with timer not initialized\n",
 480                         __builtin_return_address(0));
 481         restore_flags(flags);
 482         return 0;
 483 #else   
 484         save_flags(flags);
 485         cli();
 486         if (timer->next) {
 487                 timer->next->prev = timer->prev;
 488                 timer->prev->next = timer->next;
 489                 timer->next = timer->prev = NULL;
 490                 restore_flags(flags);
 491                 return 1;
 492         }
 493         restore_flags(flags);
 494         return 0;
 495 #endif
 496 }
 497 
 498 unsigned long timer_active = 0;
 499 struct timer_struct timer_table[32];
 500 
 501 /*
 502  * Hmm.. Changed this, as the GNU make sources (load.c) seems to
 503  * imply that avenrun[] is the standard name for this kind of thing.
 504  * Nothing else seems to be standardized: the fractional size etc
 505  * all seem to differ on different machines.
 506  */
 507 unsigned long avenrun[3] = { 0,0,0 };
 508 
 509 /*
 510  * Nr of active tasks - counted in fixed-point numbers
 511  */
 512 static unsigned long count_active_tasks(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 513 {
 514         struct task_struct **p;
 515         unsigned long nr = 0;
 516 
 517         for(p = &LAST_TASK; p > &FIRST_TASK; --p)
 518                 if (*p && ((*p)->state == TASK_RUNNING ||
 519                            (*p)->state == TASK_UNINTERRUPTIBLE ||
 520                            (*p)->state == TASK_SWAPPING))
 521                         nr += FIXED_1;
 522 #ifdef CONFIG_SMP
 523         nr-=(smp_num_cpus-1)*FIXED_1;
 524 #endif                  
 525         return nr;
 526 }
 527 
 528 static inline void calc_load(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 529 {
 530         unsigned long active_tasks; /* fixed-point */
 531         static int count = LOAD_FREQ;
 532 
 533         if (count-- > 0)
 534                 return;
 535         count = LOAD_FREQ;
 536         active_tasks = count_active_tasks();
 537         CALC_LOAD(avenrun[0], EXP_1, active_tasks);
 538         CALC_LOAD(avenrun[1], EXP_5, active_tasks);
 539         CALC_LOAD(avenrun[2], EXP_15, active_tasks);
 540 }
 541 
 542 /*
 543  * this routine handles the overflow of the microsecond field
 544  *
 545  * The tricky bits of code to handle the accurate clock support
 546  * were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame.
 547  * They were originally developed for SUN and DEC kernels.
 548  * All the kudos should go to Dave for this stuff.
 549  *
 550  */
 551 static void second_overflow(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 552 {
 553     long ltemp;
 554 
 555     /* Bump the maxerror field */
 556     time_maxerror = (0x70000000-time_maxerror <
 557                      time_tolerance >> SHIFT_USEC) ?
 558         0x70000000 : (time_maxerror + (time_tolerance >> SHIFT_USEC));
 559 
 560     /*
 561      * Leap second processing. If in leap-insert state at
 562      * the end of the day, the system clock is set back one
 563      * second; if in leap-delete state, the system clock is
 564      * set ahead one second. The microtime() routine or
 565      * external clock driver will insure that reported time
 566      * is always monotonic. The ugly divides should be
 567      * replaced.
 568      */
 569     switch (time_state) {
 570 
 571     case TIME_OK:
 572         if (time_status & STA_INS)
 573             time_state = TIME_INS;
 574         else if (time_status & STA_DEL)
 575             time_state = TIME_DEL;
 576         break;
 577 
 578     case TIME_INS:
 579         if (xtime.tv_sec % 86400 == 0) {
 580             xtime.tv_sec--;
 581             time_state = TIME_OOP;
 582             printk("Clock: inserting leap second 23:59:60 UTC\n");
 583         }
 584         break;
 585 
 586     case TIME_DEL:
 587         if ((xtime.tv_sec + 1) % 86400 == 0) {
 588             xtime.tv_sec++;
 589             time_state = TIME_WAIT;
 590             printk("Clock: deleting leap second 23:59:59 UTC\n");
 591         }
 592         break;
 593 
 594     case TIME_OOP:
 595 
 596         time_state = TIME_WAIT;
 597         break;
 598 
 599     case TIME_WAIT:
 600         if (!(time_status & (STA_INS | STA_DEL)))
 601             time_state = TIME_OK;
 602     }
 603 
 604     /*
 605      * Compute the phase adjustment for the next second. In
 606      * PLL mode, the offset is reduced by a fixed factor
 607      * times the time constant. In FLL mode the offset is
 608      * used directly. In either mode, the maximum phase
 609      * adjustment for each second is clamped so as to spread
 610      * the adjustment over not more than the number of
 611      * seconds between updates.
 612      */
 613     if (time_offset < 0) {
 614         ltemp = -time_offset;
 615         if (!(time_status & STA_FLL))
 616             ltemp >>= SHIFT_KG + time_constant;
 617         if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE)
 618             ltemp = (MAXPHASE / MINSEC) <<
 619                 SHIFT_UPDATE;
 620         time_offset += ltemp;
 621         time_adj = -ltemp << (SHIFT_SCALE - SHIFT_HZ -
 622                               SHIFT_UPDATE);
 623     } else {
 624         ltemp = time_offset;
 625         if (!(time_status & STA_FLL))
 626             ltemp >>= SHIFT_KG + time_constant;
 627         if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE)
 628             ltemp = (MAXPHASE / MINSEC) <<
 629                 SHIFT_UPDATE;
 630         time_offset -= ltemp;
 631         time_adj = ltemp << (SHIFT_SCALE - SHIFT_HZ -
 632                              SHIFT_UPDATE);
 633     }
 634 
 635     /*
 636      * Compute the frequency estimate and additional phase
 637      * adjustment due to frequency error for the next
 638      * second. When the PPS signal is engaged, gnaw on the
 639      * watchdog counter and update the frequency computed by
 640      * the pll and the PPS signal.
 641      */
 642     pps_valid++;
 643     if (pps_valid == PPS_VALID) {
 644         pps_jitter = MAXTIME;
 645         pps_stabil = MAXFREQ;
 646         time_status &= ~(STA_PPSSIGNAL | STA_PPSJITTER |
 647                          STA_PPSWANDER | STA_PPSERROR);
 648     }
 649     ltemp = time_freq + pps_freq;
 650     if (ltemp < 0)
 651         time_adj -= -ltemp >>
 652             (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE);
 653     else
 654         time_adj += ltemp >>
 655             (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE);
 656 
 657 #if HZ == 100
 658     /* compensate for (HZ==100) != 128. Add 25% to get 125; => only 3% error */
 659     if (time_adj < 0)
 660         time_adj -= -time_adj >> 2;
 661     else
 662         time_adj += time_adj >> 2;
 663 #endif
 664 }
 665 
 666 /*
 667  * disregard lost ticks for now.. We don't care enough.
 668  */
 669 static void timer_bh(void * unused)
     /* [previous][next][first][last][top][bottom][index][help] */
 670 {
 671         unsigned long mask;
 672         struct timer_struct *tp;
 673         struct timer_list * timer;
 674 
 675         cli();
 676         while ((timer = timer_head.next) != &timer_head && timer->expires <= jiffies) {
 677                 void (*fn)(unsigned long) = timer->function;
 678                 unsigned long data = timer->data;
 679                 timer->next->prev = timer->prev;
 680                 timer->prev->next = timer->next;
 681                 timer->next = timer->prev = NULL;
 682                 sti();
 683                 fn(data);
 684                 cli();
 685         }
 686         sti();
 687         
 688         for (mask = 1, tp = timer_table+0 ; mask ; tp++,mask += mask) {
 689                 if (mask > timer_active)
 690                         break;
 691                 if (!(mask & timer_active))
 692                         continue;
 693                 if (tp->expires > jiffies)
 694                         continue;
 695                 timer_active &= ~mask;
 696                 tp->fn();
 697                 sti();
 698         }
 699 }
 700 
 701 void tqueue_bh(void * unused)
     /* [previous][next][first][last][top][bottom][index][help] */
 702 {
 703         run_task_queue(&tq_timer);
 704 }
 705 
 706 void immediate_bh(void * unused)
     /* [previous][next][first][last][top][bottom][index][help] */
 707 {
 708         run_task_queue(&tq_immediate);
 709 }
 710 
 711 void do_timer(struct pt_regs * regs)
     /* [previous][next][first][last][top][bottom][index][help] */
 712 {
 713         unsigned long mask;
 714         struct timer_struct *tp;
 715         long ltemp, psecs;
 716 
 717         /* Advance the phase, once it gets to one microsecond, then
 718          * advance the tick more.
 719          */
 720         time_phase += time_adj;
 721         if (time_phase <= -FINEUSEC) {
 722                 ltemp = -time_phase >> SHIFT_SCALE;
 723                 time_phase += ltemp << SHIFT_SCALE;
 724                 xtime.tv_usec += tick + time_adjust_step - ltemp;
 725         }
 726         else if (time_phase >= FINEUSEC) {
 727                 ltemp = time_phase >> SHIFT_SCALE;
 728                 time_phase -= ltemp << SHIFT_SCALE;
 729                 xtime.tv_usec += tick + time_adjust_step + ltemp;
 730         } else
 731                 xtime.tv_usec += tick + time_adjust_step;
 732 
 733         if (time_adjust) {
 734             /* We are doing an adjtime thing. 
 735              *
 736              * Modify the value of the tick for next time.
 737              * Note that a positive delta means we want the clock
 738              * to run fast. This means that the tick should be bigger
 739              *
 740              * Limit the amount of the step for *next* tick to be
 741              * in the range -tickadj .. +tickadj
 742              */
 743              if (time_adjust > tickadj)
 744                time_adjust_step = tickadj;
 745              else if (time_adjust < -tickadj)
 746                time_adjust_step = -tickadj;
 747              else
 748                time_adjust_step = time_adjust;
 749              
 750             /* Reduce by this step the amount of time left  */
 751             time_adjust -= time_adjust_step;
 752         }
 753         else
 754             time_adjust_step = 0;
 755 
 756         if (xtime.tv_usec >= 1000000) {
 757             xtime.tv_usec -= 1000000;
 758             xtime.tv_sec++;
 759             second_overflow();
 760         }
 761 
 762         jiffies++;
 763         calc_load();
 764         if (user_mode(regs)) {
 765                 current->utime++;
 766                 if (current->pid) {
 767                         if (current->priority < DEF_PRIORITY)
 768                                 kstat.cpu_nice++;
 769                         else
 770                                 kstat.cpu_user++;
 771                 }
 772                 /* Update ITIMER_VIRT for current task if not in a system call */
 773                 if (current->it_virt_value && !(--current->it_virt_value)) {
 774                         current->it_virt_value = current->it_virt_incr;
 775                         send_sig(SIGVTALRM,current,1);
 776                 }
 777         } else {
 778                 current->stime++;
 779                 if(current->pid)
 780                         kstat.cpu_system++;
 781                 if (prof_buffer && current->pid) {
 782                         extern int _stext;
 783                         unsigned long ip = instruction_pointer(regs);
 784                         ip -= (unsigned long) &_stext;
 785                         ip >>= prof_shift;
 786                         if (ip < prof_len)
 787                                 prof_buffer[ip]++;
 788                 }
 789         }
 790         /*
 791          * check the cpu time limit on the process.
 792          */
 793         if ((current->rlim[RLIMIT_CPU].rlim_max != RLIM_INFINITY) &&
 794             (((current->stime + current->utime) / HZ) >= current->rlim[RLIMIT_CPU].rlim_max))
 795                 send_sig(SIGKILL, current, 1);
 796         if ((current->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) &&
 797             (((current->stime + current->utime) % HZ) == 0)) {
 798                 psecs = (current->stime + current->utime) / HZ;
 799                 /* send when equal */
 800                 if (psecs == current->rlim[RLIMIT_CPU].rlim_cur)
 801                         send_sig(SIGXCPU, current, 1);
 802                 /* and every five seconds thereafter. */
 803                 else if ((psecs > current->rlim[RLIMIT_CPU].rlim_cur) &&
 804                         ((psecs - current->rlim[RLIMIT_CPU].rlim_cur) % 5) == 0)
 805                         send_sig(SIGXCPU, current, 1);
 806         }
 807 
 808         if (current->pid && 0 > --current->counter) {
 809                 current->counter = 0;
 810                 need_resched = 1;
 811         }
 812         /* Update ITIMER_PROF for the current task */
 813         if (current->it_prof_value && !(--current->it_prof_value)) {
 814                 current->it_prof_value = current->it_prof_incr;
 815                 send_sig(SIGPROF,current,1);
 816         }
 817         for (mask = 1, tp = timer_table+0 ; mask ; tp++,mask += mask) {
 818                 if (mask > timer_active)
 819                         break;
 820                 if (!(mask & timer_active))
 821                         continue;
 822                 if (tp->expires > jiffies)
 823                         continue;
 824                 mark_bh(TIMER_BH);
 825         }
 826         cli();
 827         if (timer_head.next->expires <= jiffies)
 828                 mark_bh(TIMER_BH);
 829         if (tq_timer != &tq_last)
 830                 mark_bh(TQUEUE_BH);
 831         sti();
 832 }
 833 
 834 asmlinkage unsigned int sys_alarm(unsigned int seconds)
     /* [previous][next][first][last][top][bottom][index][help] */
 835 {
 836         struct itimerval it_new, it_old;
 837         unsigned int oldalarm;
 838 
 839         it_new.it_interval.tv_sec = it_new.it_interval.tv_usec = 0;
 840         it_new.it_value.tv_sec = seconds;
 841         it_new.it_value.tv_usec = 0;
 842         _setitimer(ITIMER_REAL, &it_new, &it_old);
 843         oldalarm = it_old.it_value.tv_sec;
 844         /* ehhh.. We can't return 0 if we have an alarm pending.. */
 845         /* And we'd better return too much than too little anyway */
 846         if (it_old.it_value.tv_usec)
 847                 oldalarm++;
 848         return oldalarm;
 849 }
 850 
 851 asmlinkage int sys_getpid(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 852 {
 853         return current->pid;
 854 }
 855 
 856 asmlinkage int sys_getppid(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 857 {
 858         return current->p_opptr->pid;
 859 }
 860 
 861 asmlinkage int sys_getuid(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 862 {
 863         return current->uid;
 864 }
 865 
 866 asmlinkage int sys_geteuid(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 867 {
 868         return current->euid;
 869 }
 870 
 871 asmlinkage int sys_getgid(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 872 {
 873         return current->gid;
 874 }
 875 
 876 asmlinkage int sys_getegid(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 877 {
 878         return current->egid;
 879 }
 880 
 881 asmlinkage int sys_nice(int increment)
     /* [previous][next][first][last][top][bottom][index][help] */
 882 {
 883         unsigned long newprio;
 884         int increase = 0;
 885 
 886         newprio = increment;
 887         if (increment < 0) {
 888                 if (!suser())
 889                         return -EPERM;
 890                 newprio = -increment;
 891                 increase = 1;
 892         }
 893         if (newprio > 40)
 894                 newprio = 40;
 895         /*
 896          * do a "normalization" of the priority (traditionally
 897          * unix nice values are -20..20, linux doesn't really
 898          * use that kind of thing, but uses the length of the
 899          * timeslice instead (default 150 msec). The rounding is
 900          * why we want to avoid negative values.
 901          */
 902         newprio = (newprio * DEF_PRIORITY + 10) / 20;
 903         increment = newprio;
 904         if (increase)
 905                 increment = -increment;
 906         newprio = current->priority - increment;
 907         if (newprio < 1)
 908                 newprio = 1;
 909         if (newprio > DEF_PRIORITY*2)
 910                 newprio = DEF_PRIORITY*2;
 911         current->priority = newprio;
 912         return 0;
 913 }
 914 
 915 static void show_task(int nr,struct task_struct * p)
     /* [previous][next][first][last][top][bottom][index][help] */
 916 {
 917         unsigned long free;
 918         static const char * stat_nam[] = { "R", "S", "D", "Z", "T", "W" };
 919 
 920         printk("%-8s %3d ", p->comm, (p == current) ? -nr : nr);
 921         if (((unsigned) p->state) < sizeof(stat_nam)/sizeof(char *))
 922                 printk(stat_nam[p->state]);
 923         else
 924                 printk(" ");
 925 #if ((~0UL) == 0xffffffff)
 926         if (p == current)
 927                 printk(" current  ");
 928         else
 929                 printk(" %08lX ", thread_saved_pc(&p->tss));
 930 #else
 931         if (p == current)
 932                 printk("   current task   ");
 933         else
 934                 printk(" %016lx ", thread_saved_pc(&p->tss));
 935 #endif
 936         for (free = 1; free < PAGE_SIZE/sizeof(long) ; free++) {
 937                 if (((unsigned long *)p->kernel_stack_page)[free])
 938                         break;
 939         }
 940         printk("%5lu %5d %6d ", free*sizeof(long), p->pid, p->p_pptr->pid);
 941         if (p->p_cptr)
 942                 printk("%5d ", p->p_cptr->pid);
 943         else
 944                 printk("      ");
 945         if (p->p_ysptr)
 946                 printk("%7d", p->p_ysptr->pid);
 947         else
 948                 printk("       ");
 949         if (p->p_osptr)
 950                 printk(" %5d\n", p->p_osptr->pid);
 951         else
 952                 printk("\n");
 953 }
 954 
 955 void show_state(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 956 {
 957         int i;
 958 
 959 #if ((~0UL) == 0xffffffff)
 960         printk("\n"
 961                "                         free                        sibling\n");
 962         printk("  task             PC    stack   pid father child younger older\n");
 963 #else
 964         printk("\n"
 965                "                                 free                        sibling\n");
 966         printk("  task                 PC        stack   pid father child younger older\n");
 967 #endif
 968         for (i=0 ; i<NR_TASKS ; i++)
 969                 if (task[i])
 970                         show_task(i,task[i]);
 971 }
 972 
 973 void sched_init(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 974 {
 975         /*
 976          *      We have to do a little magic to get the first
 977          *      process right in SMP mode.
 978          */
 979         int cpu=smp_processor_id();
 980         current_set[cpu]=&init_task;
 981 #ifdef CONFIG_SMP       
 982         init_task.processor=cpu;
 983 #endif
 984         bh_base[TIMER_BH].routine = timer_bh;
 985         bh_base[TQUEUE_BH].routine = tqueue_bh;
 986         bh_base[IMMEDIATE_BH].routine = immediate_bh;
 987         enable_bh(TIMER_BH);
 988         enable_bh(TQUEUE_BH);
 989         enable_bh(IMMEDIATE_BH);
 990 }

/* [previous][next][first][last][top][bottom][index][help] */