root/kernel/sched.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. add_to_runqueue
  2. del_from_runqueue
  3. wake_up_process
  4. process_timeout
  5. goodness
  6. schedule
  7. sys_pause
  8. wake_up
  9. wake_up_interruptible
  10. __down
  11. __sleep_on
  12. interruptible_sleep_on
  13. sleep_on
  14. add_timer
  15. del_timer
  16. count_active_tasks
  17. calc_load
  18. second_overflow
  19. timer_bh
  20. tqueue_bh
  21. immediate_bh
  22. do_timer
  23. sys_alarm
  24. sys_getpid
  25. sys_getppid
  26. sys_getuid
  27. sys_geteuid
  28. sys_getgid
  29. sys_getegid
  30. sys_nice
  31. show_task
  32. show_state
  33. sched_init

   1 /*
   2  *  linux/kernel/sched.c
   3  *
   4  *  Copyright (C) 1991, 1992  Linus Torvalds
   5  */
   6 
   7 /*
   8  * 'sched.c' is the main kernel file. It contains scheduling primitives
   9  * (sleep_on, wakeup, schedule etc) as well as a number of simple system
  10  * call functions (type getpid(), which just extracts a field from
  11  * current-task
  12  */
  13 
  14 #include <linux/signal.h>
  15 #include <linux/sched.h>
  16 #include <linux/timer.h>
  17 #include <linux/kernel.h>
  18 #include <linux/kernel_stat.h>
  19 #include <linux/fdreg.h>
  20 #include <linux/errno.h>
  21 #include <linux/time.h>
  22 #include <linux/ptrace.h>
  23 #include <linux/delay.h>
  24 #include <linux/interrupt.h>
  25 #include <linux/tqueue.h>
  26 #include <linux/resource.h>
  27 #include <linux/mm.h>
  28 #include <linux/smp.h>
  29 
  30 #include <asm/system.h>
  31 #include <asm/io.h>
  32 #include <asm/segment.h>
  33 #include <asm/pgtable.h>
  34 
  35 #include <linux/timex.h>
  36 
  37 /*
  38  * kernel variables
  39  */
  40 long tick = 1000000 / HZ;               /* timer interrupt period */
  41 volatile struct timeval xtime;          /* The current time */
  42 int tickadj = 500/HZ;                   /* microsecs */
  43 
  44 DECLARE_TASK_QUEUE(tq_timer);
  45 DECLARE_TASK_QUEUE(tq_immediate);
  46 DECLARE_TASK_QUEUE(tq_scheduler);
  47 
  48 /*
  49  * phase-lock loop variables
  50  */
  51 int time_state = TIME_BAD;     /* clock synchronization status */
  52 int time_status = STA_UNSYNC;   /* clock status bits */
  53 long time_offset = 0;           /* time adjustment (us) */
  54 long time_constant = 0;         /* pll time constant */
  55 long time_tolerance = MAXFREQ;  /* frequency tolerance (ppm) */
  56 long time_precision = 1;        /* clock precision (us) */
  57 long time_maxerror = 0x70000000;/* maximum error */
  58 long time_esterror = 0x70000000;/* estimated error */
  59 long time_phase = 0;            /* phase offset (scaled us) */
  60 long time_freq = 0;             /* frequency offset (scaled ppm) */
  61 long time_adj = 0;              /* tick adjust (scaled 1 / HZ) */
  62 long time_reftime = 0;          /* time at last adjustment (s) */
  63 
  64 long time_adjust = 0;
  65 long time_adjust_step = 0;
  66 
  67 int need_resched = 0;
  68 unsigned long event = 0;
  69 
  70 extern int _setitimer(int, struct itimerval *, struct itimerval *);
  71 unsigned long * prof_buffer = NULL;
  72 unsigned long prof_len = 0;
  73 unsigned long prof_shift = 0;
  74 
  75 #define _S(nr) (1<<((nr)-1))
  76 
  77 extern void mem_use(void);
  78 
  79 static unsigned long init_kernel_stack[1024] = { STACK_MAGIC, };
  80 unsigned long init_user_stack[1024] = { STACK_MAGIC, };
  81 static struct vm_area_struct init_mmap = INIT_MMAP;
  82 static struct fs_struct init_fs = INIT_FS;
  83 static struct files_struct init_files = INIT_FILES;
  84 static struct signal_struct init_signals = INIT_SIGNALS;
  85 
  86 struct mm_struct init_mm = INIT_MM;
  87 struct task_struct init_task = INIT_TASK;
  88 
  89 unsigned long volatile jiffies=0;
  90 
  91 struct task_struct *current_set[NR_CPUS];
  92 struct task_struct *last_task_used_math = NULL;
  93 
  94 struct task_struct * task[NR_TASKS] = {&init_task, };
  95 
  96 struct kernel_stat kstat = { 0 };
  97 
  98 static inline void add_to_runqueue(struct task_struct * p)
     /* [previous][next][first][last][top][bottom][index][help] */
  99 {
 100 #if 1   /* sanity tests */
 101         if (p->next_run || p->prev_run) {
 102                 printk("task already on run-queue\n");
 103                 return;
 104         }
 105 #endif
 106         if (p->counter > current->counter + 3)
 107                 need_resched = 1;
 108         nr_running++;
 109         (p->next_run = init_task.next_run)->prev_run = p;
 110         p->prev_run = &init_task;
 111         init_task.next_run = p;
 112 }
 113 
 114 static inline void del_from_runqueue(struct task_struct * p)
     /* [previous][next][first][last][top][bottom][index][help] */
 115 {
 116         struct task_struct *next = p->next_run;
 117         struct task_struct *prev = p->prev_run;
 118 
 119 #if 1   /* sanity tests */
 120         if (!next || !prev) {
 121                 printk("task not on run-queue\n");
 122                 return;
 123         }
 124 #endif
 125         if (p == &init_task) {
 126                 static int nr = 0;
 127                 if (nr < 5) {
 128                         nr++;
 129                         printk("idle task may not sleep\n");
 130                 }
 131                 return;
 132         }
 133         nr_running--;
 134         next->prev_run = prev;
 135         prev->next_run = next;
 136         p->next_run = NULL;
 137         p->prev_run = NULL;
 138 }
 139 
 140 /*
 141  * Wake up a process. Put it on the run-queue if it's not
 142  * already there.  The "current" process is always on the
 143  * run-queue (except when the actual re-schedule is in
 144  * progress), and as such you're allowed to do the simpler
 145  * "current->state = TASK_RUNNING" to mark yourself runnable
 146  * without the overhead of this.
 147  */
 148 inline void wake_up_process(struct task_struct * p)
     /* [previous][next][first][last][top][bottom][index][help] */
 149 {
 150         unsigned long flags;
 151 
 152         save_flags(flags);
 153         cli();
 154         p->state = TASK_RUNNING;
 155         if (!p->next_run)
 156                 add_to_runqueue(p);
 157         restore_flags(flags);
 158 }
 159 
 160 static void process_timeout(unsigned long __data)
     /* [previous][next][first][last][top][bottom][index][help] */
 161 {
 162         struct task_struct * p = (struct task_struct *) __data;
 163 
 164         p->timeout = 0;
 165         wake_up_process(p);
 166 }
 167 
 168 /*
 169  * This is the function that decides how desireable a process is..
 170  * You can weigh different processes against each other depending
 171  * on what CPU they've run on lately etc to try to handle cache
 172  * and TLB miss penalties.
 173  *
 174  * Return values:
 175  *       -1000: never select this
 176  *           0: out of time, recalculate counters (but it might still be
 177  *              selected)
 178  *         +ve: "goodness" value (the larger, the better)
 179  *       +1000: realtime process, select this.
 180  */
 181 static inline int goodness(struct task_struct * p, int this_cpu)
     /* [previous][next][first][last][top][bottom][index][help] */
 182 {
 183         int weight;
 184 
 185 #ifdef __SMP__  
 186         /* We are not permitted to run a task someone else is running */
 187         if (p->processor != NO_PROC_ID)
 188                 return -1000;
 189 #endif
 190 
 191         /*
 192          * Give the process a first-approximation goodness value
 193          * according to the number of clock-ticks it has left.
 194          *
 195          * Don't do any other calculations if the time slice is
 196          * over..
 197          */
 198         weight = p->counter;
 199         if (weight) {
 200 
 201 #ifdef __SMP__
 202                 /* Give a largish advantage to the same processor...   */
 203                 /* (this is equivalent to penalizing other processors) */
 204                 if (p->last_processor == this_cpu)
 205                         weight += PROC_CHANGE_PENALTY;
 206 #endif
 207 
 208                 /* .. and a slight advantage to the current process */
 209                 if (p == current)
 210                         weight += 1;
 211         }
 212 
 213         return weight;
 214 }
 215 
 216 /*
 217  *  'schedule()' is the scheduler function. It's a very simple and nice
 218  * scheduler: it's not perfect, but certainly works for most things.
 219  *
 220  * The goto is "interesting".
 221  *
 222  *   NOTE!!  Task 0 is the 'idle' task, which gets called when no other
 223  * tasks can run. It can not be killed, and it cannot sleep. The 'state'
 224  * information in task[0] is never used.
 225  */
 226 asmlinkage void schedule(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 227 {
 228         int c;
 229         struct task_struct * p;
 230         struct task_struct * next;
 231         unsigned long timeout = 0;
 232         int this_cpu=smp_processor_id();
 233 
 234 /* check alarm, wake up any interruptible tasks that have got a signal */
 235 
 236         if (intr_count) {
 237                 printk("Aiee: scheduling in interrupt\n");
 238                 return;
 239         }
 240         run_task_queue(&tq_scheduler);
 241 
 242         need_resched = 0;
 243         cli();
 244         switch (current->state) {
 245                 case TASK_INTERRUPTIBLE:
 246                         if (current->signal & ~current->blocked)
 247                                 goto makerunnable;
 248                         timeout = current->timeout;
 249                         if (timeout && (timeout <= jiffies)) {
 250                                 current->timeout = 0;
 251                                 timeout = 0;
 252                 makerunnable:
 253                                 current->state = TASK_RUNNING;
 254                                 break;
 255                         }
 256                 default:
 257                         del_from_runqueue(current);
 258                 case TASK_RUNNING:
 259         }
 260         p = init_task.next_run;
 261         sti();
 262         
 263 #ifdef __SMP__
 264         /*
 265          *      This is safe as we do not permit re-entry of schedule()
 266          */
 267         current->processor = NO_PROC_ID;        
 268 #endif  
 269 
 270 /*
 271  * Note! there may appear new tasks on the run-queue during this, as
 272  * interrupts are enabled. However, they will be put on front of the
 273  * list, so our list starting at "p" is essentially fixed.
 274  */
 275 /* this is the scheduler proper: */
 276         c = -1000;
 277         next = &init_task;
 278         while (p != &init_task) {
 279                 int weight = goodness(p, this_cpu);
 280                 if (weight > c)
 281                         c = weight, next = p;
 282                 p = p->next_run;
 283         }
 284 
 285         /* if all runnable processes have "counter == 0", re-calculate counters */
 286         if (!c) {
 287                 for_each_task(p)
 288                         p->counter = (p->counter >> 1) + p->priority;
 289         }
 290 #ifdef __SMP__  
 291         
 292         /*
 293          *      Context switching between two idle threads is pointless.
 294          */
 295         if(!current->pid && !next->pid)
 296                 next=current;
 297         /*
 298          *      Allocate process to CPU
 299          */
 300          
 301          next->processor = this_cpu;
 302          next->last_processor = this_cpu;
 303          
 304 #endif   
 305         if (current != next) {
 306                 struct timer_list timer;
 307 
 308                 kstat.context_swtch++;
 309                 if (timeout) {
 310                         init_timer(&timer);
 311                         timer.expires = timeout;
 312                         timer.data = (unsigned long) current;
 313                         timer.function = process_timeout;
 314                         add_timer(&timer);
 315                 }
 316                 switch_to(next);
 317                 if (timeout)
 318                         del_timer(&timer);
 319         }
 320 }
 321 
 322 asmlinkage int sys_pause(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 323 {
 324         current->state = TASK_INTERRUPTIBLE;
 325         schedule();
 326         return -ERESTARTNOHAND;
 327 }
 328 
 329 /*
 330  * wake_up doesn't wake up stopped processes - they have to be awakened
 331  * with signals or similar.
 332  *
 333  * Note that this doesn't need cli-sti pairs: interrupts may not change
 334  * the wait-queue structures directly, but only call wake_up() to wake
 335  * a process. The process itself must remove the queue once it has woken.
 336  */
 337 void wake_up(struct wait_queue **q)
     /* [previous][next][first][last][top][bottom][index][help] */
 338 {
 339         struct wait_queue *tmp;
 340         struct task_struct * p;
 341 
 342         if (!q || !(tmp = *q))
 343                 return;
 344         do {
 345                 if ((p = tmp->task) != NULL) {
 346                         if ((p->state == TASK_UNINTERRUPTIBLE) ||
 347                             (p->state == TASK_INTERRUPTIBLE))
 348                                 wake_up_process(p);
 349                 }
 350                 if (!tmp->next) {
 351                         printk("wait_queue is bad (eip = %p)\n",
 352                                 __builtin_return_address(0));
 353                         printk("        q = %p\n",q);
 354                         printk("       *q = %p\n",*q);
 355                         printk("      tmp = %p\n",tmp);
 356                         break;
 357                 }
 358                 tmp = tmp->next;
 359         } while (tmp != *q);
 360 }
 361 
 362 void wake_up_interruptible(struct wait_queue **q)
     /* [previous][next][first][last][top][bottom][index][help] */
 363 {
 364         struct wait_queue *tmp;
 365         struct task_struct * p;
 366 
 367         if (!q || !(tmp = *q))
 368                 return;
 369         do {
 370                 if ((p = tmp->task) != NULL) {
 371                         if (p->state == TASK_INTERRUPTIBLE)
 372                                 wake_up_process(p);
 373                 }
 374                 if (!tmp->next) {
 375                         printk("wait_queue is bad (eip = %p)\n",
 376                                 __builtin_return_address(0));
 377                         printk("        q = %p\n",q);
 378                         printk("       *q = %p\n",*q);
 379                         printk("      tmp = %p\n",tmp);
 380                         break;
 381                 }
 382                 tmp = tmp->next;
 383         } while (tmp != *q);
 384 }
 385 
 386 void __down(struct semaphore * sem)
     /* [previous][next][first][last][top][bottom][index][help] */
 387 {
 388         struct wait_queue wait = { current, NULL };
 389         add_wait_queue(&sem->wait, &wait);
 390         current->state = TASK_UNINTERRUPTIBLE;
 391         while (sem->count <= 0) {
 392                 schedule();
 393                 current->state = TASK_UNINTERRUPTIBLE;
 394         }
 395         current->state = TASK_RUNNING;
 396         remove_wait_queue(&sem->wait, &wait);
 397 }
 398 
 399 static inline void __sleep_on(struct wait_queue **p, int state)
     /* [previous][next][first][last][top][bottom][index][help] */
 400 {
 401         unsigned long flags;
 402         struct wait_queue wait = { current, NULL };
 403 
 404         if (!p)
 405                 return;
 406         if (current == task[0])
 407                 panic("task[0] trying to sleep");
 408         current->state = state;
 409         add_wait_queue(p, &wait);
 410         save_flags(flags);
 411         sti();
 412         schedule();
 413         remove_wait_queue(p, &wait);
 414         restore_flags(flags);
 415 }
 416 
 417 void interruptible_sleep_on(struct wait_queue **p)
     /* [previous][next][first][last][top][bottom][index][help] */
 418 {
 419         __sleep_on(p,TASK_INTERRUPTIBLE);
 420 }
 421 
 422 void sleep_on(struct wait_queue **p)
     /* [previous][next][first][last][top][bottom][index][help] */
 423 {
 424         __sleep_on(p,TASK_UNINTERRUPTIBLE);
 425 }
 426 
 427 /*
 428  * The head for the timer-list has a "expires" field of MAX_UINT,
 429  * and the sorting routine counts on this..
 430  */
 431 static struct timer_list timer_head = { &timer_head, &timer_head, ~0, 0, NULL };
 432 #define SLOW_BUT_DEBUGGING_TIMERS 1
 433 
 434 void add_timer(struct timer_list * timer)
     /* [previous][next][first][last][top][bottom][index][help] */
 435 {
 436         unsigned long flags;
 437         struct timer_list *p;
 438 
 439 #if SLOW_BUT_DEBUGGING_TIMERS
 440         if (timer->next || timer->prev) {
 441                 printk("add_timer() called with non-zero list from %p\n",
 442                         __builtin_return_address(0));
 443                 return;
 444         }
 445 #endif
 446         p = &timer_head;
 447         save_flags(flags);
 448         cli();
 449         do {
 450                 p = p->next;
 451         } while (timer->expires > p->expires);
 452         timer->next = p;
 453         timer->prev = p->prev;
 454         p->prev = timer;
 455         timer->prev->next = timer;
 456         restore_flags(flags);
 457 }
 458 
 459 int del_timer(struct timer_list * timer)
     /* [previous][next][first][last][top][bottom][index][help] */
 460 {
 461         unsigned long flags;
 462 #if SLOW_BUT_DEBUGGING_TIMERS
 463         struct timer_list * p;
 464 
 465         p = &timer_head;
 466         save_flags(flags);
 467         cli();
 468         while ((p = p->next) != &timer_head) {
 469                 if (p == timer) {
 470                         timer->next->prev = timer->prev;
 471                         timer->prev->next = timer->next;
 472                         timer->next = timer->prev = NULL;
 473                         restore_flags(flags);
 474                         return 1;
 475                 }
 476         }
 477         if (timer->next || timer->prev)
 478                 printk("del_timer() called from %p with timer not initialized\n",
 479                         __builtin_return_address(0));
 480         restore_flags(flags);
 481         return 0;
 482 #else   
 483         save_flags(flags);
 484         cli();
 485         if (timer->next) {
 486                 timer->next->prev = timer->prev;
 487                 timer->prev->next = timer->next;
 488                 timer->next = timer->prev = NULL;
 489                 restore_flags(flags);
 490                 return 1;
 491         }
 492         restore_flags(flags);
 493         return 0;
 494 #endif
 495 }
 496 
 497 unsigned long timer_active = 0;
 498 struct timer_struct timer_table[32];
 499 
 500 /*
 501  * Hmm.. Changed this, as the GNU make sources (load.c) seems to
 502  * imply that avenrun[] is the standard name for this kind of thing.
 503  * Nothing else seems to be standardized: the fractional size etc
 504  * all seem to differ on different machines.
 505  */
 506 unsigned long avenrun[3] = { 0,0,0 };
 507 
 508 /*
 509  * Nr of active tasks - counted in fixed-point numbers
 510  */
 511 static unsigned long count_active_tasks(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 512 {
 513         struct task_struct **p;
 514         unsigned long nr = 0;
 515 
 516         for(p = &LAST_TASK; p > &FIRST_TASK; --p)
 517                 if (*p && ((*p)->state == TASK_RUNNING ||
 518                            (*p)->state == TASK_UNINTERRUPTIBLE ||
 519                            (*p)->state == TASK_SWAPPING))
 520                         nr += FIXED_1;
 521 #ifdef __SMP__
 522         nr-=(smp_num_cpus-1)*FIXED_1;
 523 #endif                  
 524         return nr;
 525 }
 526 
 527 static inline void calc_load(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 528 {
 529         unsigned long active_tasks; /* fixed-point */
 530         static int count = LOAD_FREQ;
 531 
 532         if (count-- > 0)
 533                 return;
 534         count = LOAD_FREQ;
 535         active_tasks = count_active_tasks();
 536         CALC_LOAD(avenrun[0], EXP_1, active_tasks);
 537         CALC_LOAD(avenrun[1], EXP_5, active_tasks);
 538         CALC_LOAD(avenrun[2], EXP_15, active_tasks);
 539 }
 540 
 541 /*
 542  * this routine handles the overflow of the microsecond field
 543  *
 544  * The tricky bits of code to handle the accurate clock support
 545  * were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame.
 546  * They were originally developed for SUN and DEC kernels.
 547  * All the kudos should go to Dave for this stuff.
 548  *
 549  */
 550 static void second_overflow(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 551 {
 552     long ltemp;
 553 
 554     /* Bump the maxerror field */
 555     time_maxerror = (0x70000000-time_maxerror <
 556                      time_tolerance >> SHIFT_USEC) ?
 557         0x70000000 : (time_maxerror + (time_tolerance >> SHIFT_USEC));
 558 
 559     /*
 560      * Leap second processing. If in leap-insert state at
 561      * the end of the day, the system clock is set back one
 562      * second; if in leap-delete state, the system clock is
 563      * set ahead one second. The microtime() routine or
 564      * external clock driver will insure that reported time
 565      * is always monotonic. The ugly divides should be
 566      * replaced.
 567      */
 568     switch (time_state) {
 569 
 570     case TIME_OK:
 571         if (time_status & STA_INS)
 572             time_state = TIME_INS;
 573         else if (time_status & STA_DEL)
 574             time_state = TIME_DEL;
 575         break;
 576 
 577     case TIME_INS:
 578         if (xtime.tv_sec % 86400 == 0) {
 579             xtime.tv_sec--;
 580             time_state = TIME_OOP;
 581             printk("Clock: inserting leap second 23:59:60 UTC\n");
 582         }
 583         break;
 584 
 585     case TIME_DEL:
 586         if ((xtime.tv_sec + 1) % 86400 == 0) {
 587             xtime.tv_sec++;
 588             time_state = TIME_WAIT;
 589             printk("Clock: deleting leap second 23:59:59 UTC\n");
 590         }
 591         break;
 592 
 593     case TIME_OOP:
 594 
 595         time_state = TIME_WAIT;
 596         break;
 597 
 598     case TIME_WAIT:
 599         if (!(time_status & (STA_INS | STA_DEL)))
 600             time_state = TIME_OK;
 601     }
 602 
 603     /*
 604      * Compute the phase adjustment for the next second. In
 605      * PLL mode, the offset is reduced by a fixed factor
 606      * times the time constant. In FLL mode the offset is
 607      * used directly. In either mode, the maximum phase
 608      * adjustment for each second is clamped so as to spread
 609      * the adjustment over not more than the number of
 610      * seconds between updates.
 611      */
 612     if (time_offset < 0) {
 613         ltemp = -time_offset;
 614         if (!(time_status & STA_FLL))
 615             ltemp >>= SHIFT_KG + time_constant;
 616         if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE)
 617             ltemp = (MAXPHASE / MINSEC) <<
 618                 SHIFT_UPDATE;
 619         time_offset += ltemp;
 620         time_adj = -ltemp << (SHIFT_SCALE - SHIFT_HZ -
 621                               SHIFT_UPDATE);
 622     } else {
 623         ltemp = time_offset;
 624         if (!(time_status & STA_FLL))
 625             ltemp >>= SHIFT_KG + time_constant;
 626         if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE)
 627             ltemp = (MAXPHASE / MINSEC) <<
 628                 SHIFT_UPDATE;
 629         time_offset -= ltemp;
 630         time_adj = ltemp << (SHIFT_SCALE - SHIFT_HZ -
 631                              SHIFT_UPDATE);
 632     }
 633 
 634     /*
 635      * Compute the frequency estimate and additional phase
 636      * adjustment due to frequency error for the next
 637      * second. When the PPS signal is engaged, gnaw on the
 638      * watchdog counter and update the frequency computed by
 639      * the pll and the PPS signal.
 640      */
 641     pps_valid++;
 642     if (pps_valid == PPS_VALID) {
 643         pps_jitter = MAXTIME;
 644         pps_stabil = MAXFREQ;
 645         time_status &= ~(STA_PPSSIGNAL | STA_PPSJITTER |
 646                          STA_PPSWANDER | STA_PPSERROR);
 647     }
 648     ltemp = time_freq + pps_freq;
 649     if (ltemp < 0)
 650         time_adj -= -ltemp >>
 651             (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE);
 652     else
 653         time_adj += ltemp >>
 654             (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE);
 655 
 656 #if HZ == 100
 657     /* compensate for (HZ==100) != 128. Add 25% to get 125; => only 3% error */
 658     if (time_adj < 0)
 659         time_adj -= -time_adj >> 2;
 660     else
 661         time_adj += time_adj >> 2;
 662 #endif
 663 }
 664 
 665 /*
 666  * disregard lost ticks for now.. We don't care enough.
 667  */
 668 static void timer_bh(void * unused)
     /* [previous][next][first][last][top][bottom][index][help] */
 669 {
 670         unsigned long mask;
 671         struct timer_struct *tp;
 672         struct timer_list * timer;
 673 
 674         cli();
 675         while ((timer = timer_head.next) != &timer_head && timer->expires <= jiffies) {
 676                 void (*fn)(unsigned long) = timer->function;
 677                 unsigned long data = timer->data;
 678                 timer->next->prev = timer->prev;
 679                 timer->prev->next = timer->next;
 680                 timer->next = timer->prev = NULL;
 681                 sti();
 682                 fn(data);
 683                 cli();
 684         }
 685         sti();
 686         
 687         for (mask = 1, tp = timer_table+0 ; mask ; tp++,mask += mask) {
 688                 if (mask > timer_active)
 689                         break;
 690                 if (!(mask & timer_active))
 691                         continue;
 692                 if (tp->expires > jiffies)
 693                         continue;
 694                 timer_active &= ~mask;
 695                 tp->fn();
 696                 sti();
 697         }
 698 }
 699 
 700 void tqueue_bh(void * unused)
     /* [previous][next][first][last][top][bottom][index][help] */
 701 {
 702         run_task_queue(&tq_timer);
 703 }
 704 
 705 void immediate_bh(void * unused)
     /* [previous][next][first][last][top][bottom][index][help] */
 706 {
 707         run_task_queue(&tq_immediate);
 708 }
 709 
 710 void do_timer(struct pt_regs * regs)
     /* [previous][next][first][last][top][bottom][index][help] */
 711 {
 712         unsigned long mask;
 713         struct timer_struct *tp;
 714         long ltemp, psecs;
 715 
 716         /* Advance the phase, once it gets to one microsecond, then
 717          * advance the tick more.
 718          */
 719         time_phase += time_adj;
 720         if (time_phase <= -FINEUSEC) {
 721                 ltemp = -time_phase >> SHIFT_SCALE;
 722                 time_phase += ltemp << SHIFT_SCALE;
 723                 xtime.tv_usec += tick + time_adjust_step - ltemp;
 724         }
 725         else if (time_phase >= FINEUSEC) {
 726                 ltemp = time_phase >> SHIFT_SCALE;
 727                 time_phase -= ltemp << SHIFT_SCALE;
 728                 xtime.tv_usec += tick + time_adjust_step + ltemp;
 729         } else
 730                 xtime.tv_usec += tick + time_adjust_step;
 731 
 732         if (time_adjust) {
 733             /* We are doing an adjtime thing. 
 734              *
 735              * Modify the value of the tick for next time.
 736              * Note that a positive delta means we want the clock
 737              * to run fast. This means that the tick should be bigger
 738              *
 739              * Limit the amount of the step for *next* tick to be
 740              * in the range -tickadj .. +tickadj
 741              */
 742              if (time_adjust > tickadj)
 743                time_adjust_step = tickadj;
 744              else if (time_adjust < -tickadj)
 745                time_adjust_step = -tickadj;
 746              else
 747                time_adjust_step = time_adjust;
 748              
 749             /* Reduce by this step the amount of time left  */
 750             time_adjust -= time_adjust_step;
 751         }
 752         else
 753             time_adjust_step = 0;
 754 
 755         if (xtime.tv_usec >= 1000000) {
 756             xtime.tv_usec -= 1000000;
 757             xtime.tv_sec++;
 758             second_overflow();
 759         }
 760 
 761         jiffies++;
 762         calc_load();
 763         if (user_mode(regs)) {
 764                 current->utime++;
 765                 if (current->pid) {
 766                         if (current->priority < DEF_PRIORITY)
 767                                 kstat.cpu_nice++;
 768                         else
 769                                 kstat.cpu_user++;
 770                 }
 771                 /* Update ITIMER_VIRT for current task if not in a system call */
 772                 if (current->it_virt_value && !(--current->it_virt_value)) {
 773                         current->it_virt_value = current->it_virt_incr;
 774                         send_sig(SIGVTALRM,current,1);
 775                 }
 776         } else {
 777                 current->stime++;
 778                 if(current->pid)
 779                         kstat.cpu_system++;
 780                 if (prof_buffer && current->pid) {
 781                         extern int _stext;
 782                         unsigned long ip = instruction_pointer(regs);
 783                         ip -= (unsigned long) &_stext;
 784                         ip >>= prof_shift;
 785                         if (ip < prof_len)
 786                                 prof_buffer[ip]++;
 787                 }
 788         }
 789         /*
 790          * check the cpu time limit on the process.
 791          */
 792         if ((current->rlim[RLIMIT_CPU].rlim_max != RLIM_INFINITY) &&
 793             (((current->stime + current->utime) / HZ) >= current->rlim[RLIMIT_CPU].rlim_max))
 794                 send_sig(SIGKILL, current, 1);
 795         if ((current->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) &&
 796             (((current->stime + current->utime) % HZ) == 0)) {
 797                 psecs = (current->stime + current->utime) / HZ;
 798                 /* send when equal */
 799                 if (psecs == current->rlim[RLIMIT_CPU].rlim_cur)
 800                         send_sig(SIGXCPU, current, 1);
 801                 /* and every five seconds thereafter. */
 802                 else if ((psecs > current->rlim[RLIMIT_CPU].rlim_cur) &&
 803                         ((psecs - current->rlim[RLIMIT_CPU].rlim_cur) % 5) == 0)
 804                         send_sig(SIGXCPU, current, 1);
 805         }
 806 
 807         if (current->pid && 0 > --current->counter) {
 808                 current->counter = 0;
 809                 need_resched = 1;
 810         }
 811         /* Update ITIMER_PROF for the current task */
 812         if (current->it_prof_value && !(--current->it_prof_value)) {
 813                 current->it_prof_value = current->it_prof_incr;
 814                 send_sig(SIGPROF,current,1);
 815         }
 816         for (mask = 1, tp = timer_table+0 ; mask ; tp++,mask += mask) {
 817                 if (mask > timer_active)
 818                         break;
 819                 if (!(mask & timer_active))
 820                         continue;
 821                 if (tp->expires > jiffies)
 822                         continue;
 823                 mark_bh(TIMER_BH);
 824         }
 825         cli();
 826         if (timer_head.next->expires <= jiffies)
 827                 mark_bh(TIMER_BH);
 828         if (tq_timer != &tq_last)
 829                 mark_bh(TQUEUE_BH);
 830         sti();
 831 }
 832 
 833 asmlinkage unsigned int sys_alarm(unsigned int seconds)
     /* [previous][next][first][last][top][bottom][index][help] */
 834 {
 835         struct itimerval it_new, it_old;
 836         unsigned int oldalarm;
 837 
 838         it_new.it_interval.tv_sec = it_new.it_interval.tv_usec = 0;
 839         it_new.it_value.tv_sec = seconds;
 840         it_new.it_value.tv_usec = 0;
 841         _setitimer(ITIMER_REAL, &it_new, &it_old);
 842         oldalarm = it_old.it_value.tv_sec;
 843         /* ehhh.. We can't return 0 if we have an alarm pending.. */
 844         /* And we'd better return too much than too little anyway */
 845         if (it_old.it_value.tv_usec)
 846                 oldalarm++;
 847         return oldalarm;
 848 }
 849 
 850 asmlinkage int sys_getpid(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 851 {
 852         return current->pid;
 853 }
 854 
 855 asmlinkage int sys_getppid(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 856 {
 857         return current->p_opptr->pid;
 858 }
 859 
 860 asmlinkage int sys_getuid(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 861 {
 862         return current->uid;
 863 }
 864 
 865 asmlinkage int sys_geteuid(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 866 {
 867         return current->euid;
 868 }
 869 
 870 asmlinkage int sys_getgid(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 871 {
 872         return current->gid;
 873 }
 874 
 875 asmlinkage int sys_getegid(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 876 {
 877         return current->egid;
 878 }
 879 
 880 asmlinkage int sys_nice(int increment)
     /* [previous][next][first][last][top][bottom][index][help] */
 881 {
 882         unsigned long newprio;
 883         int increase = 0;
 884 
 885         newprio = increment;
 886         if (increment < 0) {
 887                 if (!suser())
 888                         return -EPERM;
 889                 newprio = -increment;
 890                 increase = 1;
 891         }
 892         if (newprio > 40)
 893                 newprio = 40;
 894         /*
 895          * do a "normalization" of the priority (traditionally
 896          * unix nice values are -20..20, linux doesn't really
 897          * use that kind of thing, but uses the length of the
 898          * timeslice instead (default 150 msec). The rounding is
 899          * why we want to avoid negative values.
 900          */
 901         newprio = (newprio * DEF_PRIORITY + 10) / 20;
 902         increment = newprio;
 903         if (increase)
 904                 increment = -increment;
 905         newprio = current->priority - increment;
 906         if (newprio < 1)
 907                 newprio = 1;
 908         if (newprio > DEF_PRIORITY*2)
 909                 newprio = DEF_PRIORITY*2;
 910         current->priority = newprio;
 911         return 0;
 912 }
 913 
 914 static void show_task(int nr,struct task_struct * p)
     /* [previous][next][first][last][top][bottom][index][help] */
 915 {
 916         unsigned long free;
 917         static const char * stat_nam[] = { "R", "S", "D", "Z", "T", "W" };
 918 
 919         printk("%-8s %3d ", p->comm, (p == current) ? -nr : nr);
 920         if (((unsigned) p->state) < sizeof(stat_nam)/sizeof(char *))
 921                 printk(stat_nam[p->state]);
 922         else
 923                 printk(" ");
 924 #if ((~0UL) == 0xffffffff)
 925         if (p == current)
 926                 printk(" current  ");
 927         else
 928                 printk(" %08lX ", thread_saved_pc(&p->tss));
 929 #else
 930         if (p == current)
 931                 printk("   current task   ");
 932         else
 933                 printk(" %016lx ", thread_saved_pc(&p->tss));
 934 #endif
 935         for (free = 1; free < PAGE_SIZE/sizeof(long) ; free++) {
 936                 if (((unsigned long *)p->kernel_stack_page)[free])
 937                         break;
 938         }
 939         printk("%5lu %5d %6d ", free*sizeof(long), p->pid, p->p_pptr->pid);
 940         if (p->p_cptr)
 941                 printk("%5d ", p->p_cptr->pid);
 942         else
 943                 printk("      ");
 944         if (p->p_ysptr)
 945                 printk("%7d", p->p_ysptr->pid);
 946         else
 947                 printk("       ");
 948         if (p->p_osptr)
 949                 printk(" %5d\n", p->p_osptr->pid);
 950         else
 951                 printk("\n");
 952 }
 953 
 954 void show_state(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 955 {
 956         int i;
 957 
 958 #if ((~0UL) == 0xffffffff)
 959         printk("\n"
 960                "                         free                        sibling\n");
 961         printk("  task             PC    stack   pid father child younger older\n");
 962 #else
 963         printk("\n"
 964                "                                 free                        sibling\n");
 965         printk("  task                 PC        stack   pid father child younger older\n");
 966 #endif
 967         for (i=0 ; i<NR_TASKS ; i++)
 968                 if (task[i])
 969                         show_task(i,task[i]);
 970 }
 971 
 972 void sched_init(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 973 {
 974         /*
 975          *      We have to do a little magic to get the first
 976          *      process right in SMP mode.
 977          */
 978         int cpu=smp_processor_id();
 979         current_set[cpu]=&init_task;
 980 #ifdef __SMP__  
 981         init_task.processor=cpu;
 982 #endif
 983         bh_base[TIMER_BH].routine = timer_bh;
 984         bh_base[TQUEUE_BH].routine = tqueue_bh;
 985         bh_base[IMMEDIATE_BH].routine = immediate_bh;
 986         enable_bh(TIMER_BH);
 987         enable_bh(TQUEUE_BH);
 988         enable_bh(IMMEDIATE_BH);
 989 }

/* [previous][next][first][last][top][bottom][index][help] */