root/kernel/sched.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. add_to_runqueue
  2. del_from_runqueue
  3. move_last_runqueue
  4. wake_up_process
  5. process_timeout
  6. goodness
  7. schedule
  8. sys_pause
  9. wake_up
  10. wake_up_interruptible
  11. __down
  12. __sleep_on
  13. interruptible_sleep_on
  14. sleep_on
  15. add_timer
  16. del_timer
  17. count_active_tasks
  18. calc_load
  19. second_overflow
  20. timer_bh
  21. tqueue_bh
  22. immediate_bh
  23. do_timer
  24. sys_alarm
  25. sys_getpid
  26. sys_getppid
  27. sys_getuid
  28. sys_geteuid
  29. sys_getgid
  30. sys_getegid
  31. sys_nice
  32. find_process_by_pid
  33. setscheduler
  34. sys_sched_setscheduler
  35. sys_sched_setparam
  36. sys_sched_getscheduler
  37. sys_sched_getparam
  38. sys_sched_yield
  39. sys_sched_get_priority_max
  40. sys_sched_get_priority_min
  41. sys_sched_rr_get_interval
  42. show_task
  43. show_state
  44. sched_init

   1 /*
   2  *  linux/kernel/sched.c
   3  *
   4  *  Copyright (C) 1991, 1992  Linus Torvalds
   5  */
   6 
   7 /*
   8  * 'sched.c' is the main kernel file. It contains scheduling primitives
   9  * (sleep_on, wakeup, schedule etc) as well as a number of simple system
  10  * call functions (type getpid(), which just extracts a field from
  11  * current-task
  12  */
  13 
  14 #include <linux/signal.h>
  15 #include <linux/sched.h>
  16 #include <linux/timer.h>
  17 #include <linux/kernel.h>
  18 #include <linux/kernel_stat.h>
  19 #include <linux/fdreg.h>
  20 #include <linux/errno.h>
  21 #include <linux/time.h>
  22 #include <linux/ptrace.h>
  23 #include <linux/delay.h>
  24 #include <linux/interrupt.h>
  25 #include <linux/tqueue.h>
  26 #include <linux/resource.h>
  27 #include <linux/mm.h>
  28 #include <linux/smp.h>
  29 
  30 #include <asm/system.h>
  31 #include <asm/io.h>
  32 #include <asm/segment.h>
  33 #include <asm/pgtable.h>
  34 #include <asm/mmu_context.h>
  35 
  36 #include <linux/timex.h>
  37 
  38 /*
  39  * kernel variables
  40  */
  41 long tick = 1000000 / HZ;               /* timer interrupt period */
  42 volatile struct timeval xtime;          /* The current time */
  43 int tickadj = 500/HZ;                   /* microsecs */
  44 
  45 DECLARE_TASK_QUEUE(tq_timer);
  46 DECLARE_TASK_QUEUE(tq_immediate);
  47 DECLARE_TASK_QUEUE(tq_scheduler);
  48 
  49 /*
  50  * phase-lock loop variables
  51  */
  52 int time_state = TIME_BAD;     /* clock synchronization status */
  53 int time_status = STA_UNSYNC | STA_PLL; /* clock status bits */
  54 long time_offset = 0;           /* time adjustment (us) */
  55 long time_constant = 2;         /* pll time constant */
  56 long time_tolerance = MAXFREQ;  /* frequency tolerance (ppm) */
  57 long time_precision = 1;        /* clock precision (us) */
  58 long time_maxerror = 0x70000000;/* maximum error */
  59 long time_esterror = 0x70000000;/* estimated error */
  60 long time_phase = 0;            /* phase offset (scaled us) */
  61 long time_freq = 0;             /* frequency offset (scaled ppm) */
  62 long time_adj = 0;              /* tick adjust (scaled 1 / HZ) */
  63 long time_reftime = 0;          /* time at last adjustment (s) */
  64 
  65 long time_adjust = 0;
  66 long time_adjust_step = 0;
  67 
  68 int need_resched = 0;
  69 unsigned long event = 0;
  70 
  71 extern int _setitimer(int, struct itimerval *, struct itimerval *);
  72 unsigned long * prof_buffer = NULL;
  73 unsigned long prof_len = 0;
  74 unsigned long prof_shift = 0;
  75 
  76 #define _S(nr) (1<<((nr)-1))
  77 
  78 extern void mem_use(void);
  79 
  80 static unsigned long init_kernel_stack[1024] = { STACK_MAGIC, };
  81 unsigned long init_user_stack[1024] = { STACK_MAGIC, };
  82 static struct vm_area_struct init_mmap = INIT_MMAP;
  83 static struct fs_struct init_fs = INIT_FS;
  84 static struct files_struct init_files = INIT_FILES;
  85 static struct signal_struct init_signals = INIT_SIGNALS;
  86 
  87 struct mm_struct init_mm = INIT_MM;
  88 struct task_struct init_task = INIT_TASK;
  89 
  90 unsigned long volatile jiffies=0;
  91 
  92 struct task_struct *current_set[NR_CPUS];
  93 struct task_struct *last_task_used_math = NULL;
  94 
  95 struct task_struct * task[NR_TASKS] = {&init_task, };
  96 
  97 struct kernel_stat kstat = { 0 };
  98 
  99 static inline void add_to_runqueue(struct task_struct * p)
     /* [previous][next][first][last][top][bottom][index][help] */
 100 {
 101 #if 1   /* sanity tests */
 102         if (p->next_run || p->prev_run) {
 103                 printk("task already on run-queue\n");
 104                 return;
 105         }
 106 #endif
 107         if (p->counter > current->counter + 3)
 108                 need_resched = 1;
 109         nr_running++;
 110         (p->prev_run = init_task.prev_run)->next_run = p;
 111         p->next_run = &init_task;
 112         init_task.prev_run = p;
 113 }
 114 
 115 static inline void del_from_runqueue(struct task_struct * p)
     /* [previous][next][first][last][top][bottom][index][help] */
 116 {
 117         struct task_struct *next = p->next_run;
 118         struct task_struct *prev = p->prev_run;
 119 
 120 #if 1   /* sanity tests */
 121         if (!next || !prev) {
 122                 printk("task not on run-queue\n");
 123                 return;
 124         }
 125 #endif
 126         if (p == &init_task) {
 127                 static int nr = 0;
 128                 if (nr < 5) {
 129                         nr++;
 130                         printk("idle task may not sleep\n");
 131                 }
 132                 return;
 133         }
 134         nr_running--;
 135         next->prev_run = prev;
 136         prev->next_run = next;
 137         p->next_run = NULL;
 138         p->prev_run = NULL;
 139 }
 140 
 141 static inline void move_last_runqueue(struct task_struct * p)
     /* [previous][next][first][last][top][bottom][index][help] */
 142 {
 143         struct task_struct *next = p->next_run;
 144         struct task_struct *prev = p->prev_run;
 145 
 146         next->prev_run = prev;
 147         prev->next_run = next;
 148         (p->prev_run = init_task.prev_run)->next_run = p;
 149         p->next_run = &init_task;
 150         init_task.prev_run = p;
 151 }
 152 
 153 /*
 154  * Wake up a process. Put it on the run-queue if it's not
 155  * already there.  The "current" process is always on the
 156  * run-queue (except when the actual re-schedule is in
 157  * progress), and as such you're allowed to do the simpler
 158  * "current->state = TASK_RUNNING" to mark yourself runnable
 159  * without the overhead of this.
 160  */
 161 inline void wake_up_process(struct task_struct * p)
     /* [previous][next][first][last][top][bottom][index][help] */
 162 {
 163         unsigned long flags;
 164 
 165         save_flags(flags);
 166         cli();
 167         p->state = TASK_RUNNING;
 168         if (!p->next_run)
 169                 add_to_runqueue(p);
 170         restore_flags(flags);
 171 }
 172 
 173 static void process_timeout(unsigned long __data)
     /* [previous][next][first][last][top][bottom][index][help] */
 174 {
 175         struct task_struct * p = (struct task_struct *) __data;
 176 
 177         p->timeout = 0;
 178         wake_up_process(p);
 179 }
 180 
 181 /*
 182  * This is the function that decides how desireable a process is..
 183  * You can weigh different processes against each other depending
 184  * on what CPU they've run on lately etc to try to handle cache
 185  * and TLB miss penalties.
 186  *
 187  * Return values:
 188  *       -1000: never select this
 189  *           0: out of time, recalculate counters (but it might still be
 190  *              selected)
 191  *         +ve: "goodness" value (the larger, the better)
 192  *       +1000: realtime process, select this.
 193  */
 194 static inline int goodness(struct task_struct * p, int this_cpu)
     /* [previous][next][first][last][top][bottom][index][help] */
 195 {
 196         int weight;
 197 
 198 #ifdef __SMP__  
 199         /* We are not permitted to run a task someone else is running */
 200         if (p->processor != NO_PROC_ID)
 201                 return -1000;
 202 #endif
 203 
 204         /*
 205          * Realtime process, select the first one on the
 206          * runqueue (taking priorities within processes
 207          * into account).
 208          */
 209         if (p->policy != SCHED_OTHER)
 210                 return 1000 + p->rt_priority;
 211 
 212         /*
 213          * Give the process a first-approximation goodness value
 214          * according to the number of clock-ticks it has left.
 215          *
 216          * Don't do any other calculations if the time slice is
 217          * over..
 218          */
 219         weight = p->counter;
 220         if (weight) {
 221                         
 222 #ifdef __SMP__
 223                 /* Give a largish advantage to the same processor...   */
 224                 /* (this is equivalent to penalizing other processors) */
 225                 if (p->last_processor == this_cpu)
 226                         weight += PROC_CHANGE_PENALTY;
 227 #endif
 228 
 229                 /* .. and a slight advantage to the current process */
 230                 if (p == current)
 231                         weight += 1;
 232         }
 233 
 234         return weight;
 235 }
 236 
 237 /*
 238  *  'schedule()' is the scheduler function. It's a very simple and nice
 239  * scheduler: it's not perfect, but certainly works for most things.
 240  *
 241  * The goto is "interesting".
 242  *
 243  *   NOTE!!  Task 0 is the 'idle' task, which gets called when no other
 244  * tasks can run. It can not be killed, and it cannot sleep. The 'state'
 245  * information in task[0] is never used.
 246  */
 247 asmlinkage void schedule(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 248 {
 249         int c;
 250         struct task_struct * p;
 251         struct task_struct * next;
 252         unsigned long timeout = 0;
 253         int this_cpu=smp_processor_id();
 254 
 255 /* check alarm, wake up any interruptible tasks that have got a signal */
 256 
 257         if (intr_count) {
 258                 printk("Aiee: scheduling in interrupt\n");
 259                 return;
 260         }
 261         run_task_queue(&tq_scheduler);
 262 
 263         need_resched = 0;
 264         cli();
 265         /* move an exhausted RR process to be last.. */
 266         if (!current->counter && current->policy == SCHED_RR) {
 267                 current->counter = current->priority;
 268                 move_last_runqueue(current);
 269         }
 270         switch (current->state) {
 271                 case TASK_INTERRUPTIBLE:
 272                         if (current->signal & ~current->blocked)
 273                                 goto makerunnable;
 274                         timeout = current->timeout;
 275                         if (timeout && (timeout <= jiffies)) {
 276                                 current->timeout = 0;
 277                                 timeout = 0;
 278                 makerunnable:
 279                                 current->state = TASK_RUNNING;
 280                                 break;
 281                         }
 282                 default:
 283                         del_from_runqueue(current);
 284                 case TASK_RUNNING:
 285         }
 286         p = init_task.next_run;
 287         sti();
 288         
 289 #ifdef __SMP__
 290         /*
 291          *      This is safe as we do not permit re-entry of schedule()
 292          */
 293         current->processor = NO_PROC_ID;        
 294 #endif  
 295 
 296 /*
 297  * Note! there may appear new tasks on the run-queue during this, as
 298  * interrupts are enabled. However, they will be put on front of the
 299  * list, so our list starting at "p" is essentially fixed.
 300  */
 301 /* this is the scheduler proper: */
 302         c = -1000;
 303         next = &init_task;
 304         while (p != &init_task) {
 305                 int weight = goodness(p, this_cpu);
 306                 if (weight > c)
 307                         c = weight, next = p;
 308                 p = p->next_run;
 309         }
 310 
 311         /* if all runnable processes have "counter == 0", re-calculate counters */
 312         if (!c) {
 313                 for_each_task(p)
 314                         p->counter = (p->counter >> 1) + p->priority;
 315         }
 316 #ifdef __SMP__  
 317         
 318         /*
 319          *      Context switching between two idle threads is pointless.
 320          */
 321         if(!current->pid && !next->pid)
 322                 next=current;
 323         /*
 324          *      Allocate process to CPU
 325          */
 326          
 327          next->processor = this_cpu;
 328          next->last_processor = this_cpu;
 329          
 330 #endif   
 331         if (current != next) {
 332                 struct timer_list timer;
 333 
 334                 kstat.context_swtch++;
 335                 if (timeout) {
 336                         init_timer(&timer);
 337                         timer.expires = timeout;
 338                         timer.data = (unsigned long) current;
 339                         timer.function = process_timeout;
 340                         add_timer(&timer);
 341                 }
 342                 get_mmu_context(next);
 343                 switch_to(next);
 344                 if (timeout)
 345                         del_timer(&timer);
 346         }
 347 }
 348 
 349 asmlinkage int sys_pause(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 350 {
 351         current->state = TASK_INTERRUPTIBLE;
 352         schedule();
 353         return -ERESTARTNOHAND;
 354 }
 355 
 356 /*
 357  * wake_up doesn't wake up stopped processes - they have to be awakened
 358  * with signals or similar.
 359  *
 360  * Note that this doesn't need cli-sti pairs: interrupts may not change
 361  * the wait-queue structures directly, but only call wake_up() to wake
 362  * a process. The process itself must remove the queue once it has woken.
 363  */
 364 void wake_up(struct wait_queue **q)
     /* [previous][next][first][last][top][bottom][index][help] */
 365 {
 366         struct wait_queue *tmp;
 367         struct task_struct * p;
 368 
 369         if (!q || !(tmp = *q))
 370                 return;
 371         do {
 372                 if ((p = tmp->task) != NULL) {
 373                         if ((p->state == TASK_UNINTERRUPTIBLE) ||
 374                             (p->state == TASK_INTERRUPTIBLE))
 375                                 wake_up_process(p);
 376                 }
 377                 if (!tmp->next) {
 378                         printk("wait_queue is bad (eip = %p)\n",
 379                                 __builtin_return_address(0));
 380                         printk("        q = %p\n",q);
 381                         printk("       *q = %p\n",*q);
 382                         printk("      tmp = %p\n",tmp);
 383                         break;
 384                 }
 385                 tmp = tmp->next;
 386         } while (tmp != *q);
 387 }
 388 
 389 void wake_up_interruptible(struct wait_queue **q)
     /* [previous][next][first][last][top][bottom][index][help] */
 390 {
 391         struct wait_queue *tmp;
 392         struct task_struct * p;
 393 
 394         if (!q || !(tmp = *q))
 395                 return;
 396         do {
 397                 if ((p = tmp->task) != NULL) {
 398                         if (p->state == TASK_INTERRUPTIBLE)
 399                                 wake_up_process(p);
 400                 }
 401                 if (!tmp->next) {
 402                         printk("wait_queue is bad (eip = %p)\n",
 403                                 __builtin_return_address(0));
 404                         printk("        q = %p\n",q);
 405                         printk("       *q = %p\n",*q);
 406                         printk("      tmp = %p\n",tmp);
 407                         break;
 408                 }
 409                 tmp = tmp->next;
 410         } while (tmp != *q);
 411 }
 412 
 413 void __down(struct semaphore * sem)
     /* [previous][next][first][last][top][bottom][index][help] */
 414 {
 415         struct wait_queue wait = { current, NULL };
 416         add_wait_queue(&sem->wait, &wait);
 417         current->state = TASK_UNINTERRUPTIBLE;
 418         while (sem->count <= 0) {
 419                 schedule();
 420                 current->state = TASK_UNINTERRUPTIBLE;
 421         }
 422         current->state = TASK_RUNNING;
 423         remove_wait_queue(&sem->wait, &wait);
 424 }
 425 
 426 static inline void __sleep_on(struct wait_queue **p, int state)
     /* [previous][next][first][last][top][bottom][index][help] */
 427 {
 428         unsigned long flags;
 429         struct wait_queue wait = { current, NULL };
 430 
 431         if (!p)
 432                 return;
 433         if (current == task[0])
 434                 panic("task[0] trying to sleep");
 435         current->state = state;
 436         add_wait_queue(p, &wait);
 437         save_flags(flags);
 438         sti();
 439         schedule();
 440         remove_wait_queue(p, &wait);
 441         restore_flags(flags);
 442 }
 443 
 444 void interruptible_sleep_on(struct wait_queue **p)
     /* [previous][next][first][last][top][bottom][index][help] */
 445 {
 446         __sleep_on(p,TASK_INTERRUPTIBLE);
 447 }
 448 
 449 void sleep_on(struct wait_queue **p)
     /* [previous][next][first][last][top][bottom][index][help] */
 450 {
 451         __sleep_on(p,TASK_UNINTERRUPTIBLE);
 452 }
 453 
 454 /*
 455  * The head for the timer-list has a "expires" field of MAX_UINT,
 456  * and the sorting routine counts on this..
 457  */
 458 static struct timer_list timer_head = { &timer_head, &timer_head, ~0, 0, NULL };
 459 #define SLOW_BUT_DEBUGGING_TIMERS 1
 460 
 461 void add_timer(struct timer_list * timer)
     /* [previous][next][first][last][top][bottom][index][help] */
 462 {
 463         unsigned long flags;
 464         struct timer_list *p;
 465 
 466 #if SLOW_BUT_DEBUGGING_TIMERS
 467         if (timer->next || timer->prev) {
 468                 printk("add_timer() called with non-zero list from %p\n",
 469                         __builtin_return_address(0));
 470                 return;
 471         }
 472 #endif
 473         p = &timer_head;
 474         save_flags(flags);
 475         cli();
 476         do {
 477                 p = p->next;
 478         } while (timer->expires > p->expires);
 479         timer->next = p;
 480         timer->prev = p->prev;
 481         p->prev = timer;
 482         timer->prev->next = timer;
 483         restore_flags(flags);
 484 }
 485 
 486 int del_timer(struct timer_list * timer)
     /* [previous][next][first][last][top][bottom][index][help] */
 487 {
 488         unsigned long flags;
 489 #if SLOW_BUT_DEBUGGING_TIMERS
 490         struct timer_list * p;
 491 
 492         p = &timer_head;
 493         save_flags(flags);
 494         cli();
 495         while ((p = p->next) != &timer_head) {
 496                 if (p == timer) {
 497                         timer->next->prev = timer->prev;
 498                         timer->prev->next = timer->next;
 499                         timer->next = timer->prev = NULL;
 500                         restore_flags(flags);
 501                         return 1;
 502                 }
 503         }
 504         if (timer->next || timer->prev)
 505                 printk("del_timer() called from %p with timer not initialized\n",
 506                         __builtin_return_address(0));
 507         restore_flags(flags);
 508         return 0;
 509 #else   
 510         save_flags(flags);
 511         cli();
 512         if (timer->next) {
 513                 timer->next->prev = timer->prev;
 514                 timer->prev->next = timer->next;
 515                 timer->next = timer->prev = NULL;
 516                 restore_flags(flags);
 517                 return 1;
 518         }
 519         restore_flags(flags);
 520         return 0;
 521 #endif
 522 }
 523 
 524 unsigned long timer_active = 0;
 525 struct timer_struct timer_table[32];
 526 
 527 /*
 528  * Hmm.. Changed this, as the GNU make sources (load.c) seems to
 529  * imply that avenrun[] is the standard name for this kind of thing.
 530  * Nothing else seems to be standardized: the fractional size etc
 531  * all seem to differ on different machines.
 532  */
 533 unsigned long avenrun[3] = { 0,0,0 };
 534 
 535 /*
 536  * Nr of active tasks - counted in fixed-point numbers
 537  */
 538 static unsigned long count_active_tasks(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 539 {
 540         struct task_struct **p;
 541         unsigned long nr = 0;
 542 
 543         for(p = &LAST_TASK; p > &FIRST_TASK; --p)
 544                 if (*p && ((*p)->state == TASK_RUNNING ||
 545                            (*p)->state == TASK_UNINTERRUPTIBLE ||
 546                            (*p)->state == TASK_SWAPPING))
 547                         nr += FIXED_1;
 548 #ifdef __SMP__
 549         nr-=(smp_num_cpus-1)*FIXED_1;
 550 #endif                  
 551         return nr;
 552 }
 553 
 554 static inline void calc_load(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 555 {
 556         unsigned long active_tasks; /* fixed-point */
 557         static int count = LOAD_FREQ;
 558 
 559         if (count-- > 0)
 560                 return;
 561         count = LOAD_FREQ;
 562         active_tasks = count_active_tasks();
 563         CALC_LOAD(avenrun[0], EXP_1, active_tasks);
 564         CALC_LOAD(avenrun[1], EXP_5, active_tasks);
 565         CALC_LOAD(avenrun[2], EXP_15, active_tasks);
 566 }
 567 
 568 /*
 569  * this routine handles the overflow of the microsecond field
 570  *
 571  * The tricky bits of code to handle the accurate clock support
 572  * were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame.
 573  * They were originally developed for SUN and DEC kernels.
 574  * All the kudos should go to Dave for this stuff.
 575  *
 576  */
 577 static void second_overflow(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 578 {
 579     long ltemp;
 580 
 581     /* Bump the maxerror field */
 582     time_maxerror = (0x70000000-time_maxerror <
 583                      time_tolerance >> SHIFT_USEC) ?
 584         0x70000000 : (time_maxerror + (time_tolerance >> SHIFT_USEC));
 585 
 586     /*
 587      * Leap second processing. If in leap-insert state at
 588      * the end of the day, the system clock is set back one
 589      * second; if in leap-delete state, the system clock is
 590      * set ahead one second. The microtime() routine or
 591      * external clock driver will insure that reported time
 592      * is always monotonic. The ugly divides should be
 593      * replaced.
 594      */
 595     switch (time_state) {
 596 
 597     case TIME_OK:
 598         if (time_status & STA_INS)
 599             time_state = TIME_INS;
 600         else if (time_status & STA_DEL)
 601             time_state = TIME_DEL;
 602         break;
 603 
 604     case TIME_INS:
 605         if (xtime.tv_sec % 86400 == 0) {
 606             xtime.tv_sec--;
 607             time_state = TIME_OOP;
 608             printk("Clock: inserting leap second 23:59:60 UTC\n");
 609         }
 610         break;
 611 
 612     case TIME_DEL:
 613         if ((xtime.tv_sec + 1) % 86400 == 0) {
 614             xtime.tv_sec++;
 615             time_state = TIME_WAIT;
 616             printk("Clock: deleting leap second 23:59:59 UTC\n");
 617         }
 618         break;
 619 
 620     case TIME_OOP:
 621 
 622         time_state = TIME_WAIT;
 623         break;
 624 
 625     case TIME_WAIT:
 626         if (!(time_status & (STA_INS | STA_DEL)))
 627             time_state = TIME_OK;
 628     }
 629 
 630     /*
 631      * Compute the phase adjustment for the next second. In
 632      * PLL mode, the offset is reduced by a fixed factor
 633      * times the time constant. In FLL mode the offset is
 634      * used directly. In either mode, the maximum phase
 635      * adjustment for each second is clamped so as to spread
 636      * the adjustment over not more than the number of
 637      * seconds between updates.
 638      */
 639     if (time_offset < 0) {
 640         ltemp = -time_offset;
 641         if (!(time_status & STA_FLL))
 642             ltemp >>= SHIFT_KG + time_constant;
 643         if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE)
 644             ltemp = (MAXPHASE / MINSEC) <<
 645                 SHIFT_UPDATE;
 646         time_offset += ltemp;
 647         time_adj = -ltemp << (SHIFT_SCALE - SHIFT_HZ -
 648                               SHIFT_UPDATE);
 649     } else {
 650         ltemp = time_offset;
 651         if (!(time_status & STA_FLL))
 652             ltemp >>= SHIFT_KG + time_constant;
 653         if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE)
 654             ltemp = (MAXPHASE / MINSEC) <<
 655                 SHIFT_UPDATE;
 656         time_offset -= ltemp;
 657         time_adj = ltemp << (SHIFT_SCALE - SHIFT_HZ -
 658                              SHIFT_UPDATE);
 659     }
 660 
 661     /*
 662      * Compute the frequency estimate and additional phase
 663      * adjustment due to frequency error for the next
 664      * second. When the PPS signal is engaged, gnaw on the
 665      * watchdog counter and update the frequency computed by
 666      * the pll and the PPS signal.
 667      */
 668     pps_valid++;
 669     if (pps_valid == PPS_VALID) {
 670         pps_jitter = MAXTIME;
 671         pps_stabil = MAXFREQ;
 672         time_status &= ~(STA_PPSSIGNAL | STA_PPSJITTER |
 673                          STA_PPSWANDER | STA_PPSERROR);
 674     }
 675     ltemp = time_freq + pps_freq;
 676     if (ltemp < 0)
 677         time_adj -= -ltemp >>
 678             (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE);
 679     else
 680         time_adj += ltemp >>
 681             (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE);
 682 
 683 #if HZ == 100
 684     /* compensate for (HZ==100) != 128. Add 25% to get 125; => only 3% error */
 685     if (time_adj < 0)
 686         time_adj -= -time_adj >> 2;
 687     else
 688         time_adj += time_adj >> 2;
 689 #endif
 690 }
 691 
 692 /*
 693  * disregard lost ticks for now.. We don't care enough.
 694  */
 695 static void timer_bh(void * unused)
     /* [previous][next][first][last][top][bottom][index][help] */
 696 {
 697         unsigned long mask;
 698         struct timer_struct *tp;
 699         struct timer_list * timer;
 700 
 701         cli();
 702         while ((timer = timer_head.next) != &timer_head && timer->expires <= jiffies) {
 703                 void (*fn)(unsigned long) = timer->function;
 704                 unsigned long data = timer->data;
 705                 timer->next->prev = timer->prev;
 706                 timer->prev->next = timer->next;
 707                 timer->next = timer->prev = NULL;
 708                 sti();
 709                 fn(data);
 710                 cli();
 711         }
 712         sti();
 713         
 714         for (mask = 1, tp = timer_table+0 ; mask ; tp++,mask += mask) {
 715                 if (mask > timer_active)
 716                         break;
 717                 if (!(mask & timer_active))
 718                         continue;
 719                 if (tp->expires > jiffies)
 720                         continue;
 721                 timer_active &= ~mask;
 722                 tp->fn();
 723                 sti();
 724         }
 725 }
 726 
 727 void tqueue_bh(void * unused)
     /* [previous][next][first][last][top][bottom][index][help] */
 728 {
 729         run_task_queue(&tq_timer);
 730 }
 731 
 732 void immediate_bh(void * unused)
     /* [previous][next][first][last][top][bottom][index][help] */
 733 {
 734         run_task_queue(&tq_immediate);
 735 }
 736 
 737 void do_timer(struct pt_regs * regs)
     /* [previous][next][first][last][top][bottom][index][help] */
 738 {
 739         unsigned long mask;
 740         struct timer_struct *tp;
 741         long ltemp, psecs;
 742 
 743         /* Advance the phase, once it gets to one microsecond, then
 744          * advance the tick more.
 745          */
 746         time_phase += time_adj;
 747         if (time_phase <= -FINEUSEC) {
 748                 ltemp = -time_phase >> SHIFT_SCALE;
 749                 time_phase += ltemp << SHIFT_SCALE;
 750                 xtime.tv_usec += tick + time_adjust_step - ltemp;
 751         }
 752         else if (time_phase >= FINEUSEC) {
 753                 ltemp = time_phase >> SHIFT_SCALE;
 754                 time_phase -= ltemp << SHIFT_SCALE;
 755                 xtime.tv_usec += tick + time_adjust_step + ltemp;
 756         } else
 757                 xtime.tv_usec += tick + time_adjust_step;
 758 
 759         if (time_adjust) {
 760             /* We are doing an adjtime thing. 
 761              *
 762              * Modify the value of the tick for next time.
 763              * Note that a positive delta means we want the clock
 764              * to run fast. This means that the tick should be bigger
 765              *
 766              * Limit the amount of the step for *next* tick to be
 767              * in the range -tickadj .. +tickadj
 768              */
 769              if (time_adjust > tickadj)
 770                time_adjust_step = tickadj;
 771              else if (time_adjust < -tickadj)
 772                time_adjust_step = -tickadj;
 773              else
 774                time_adjust_step = time_adjust;
 775              
 776             /* Reduce by this step the amount of time left  */
 777             time_adjust -= time_adjust_step;
 778         }
 779         else
 780             time_adjust_step = 0;
 781 
 782         if (xtime.tv_usec >= 1000000) {
 783             xtime.tv_usec -= 1000000;
 784             xtime.tv_sec++;
 785             second_overflow();
 786         }
 787 
 788         jiffies++;
 789         calc_load();
 790         if (user_mode(regs)) {
 791                 current->utime++;
 792                 if (current->pid) {
 793                         if (current->priority < DEF_PRIORITY)
 794                                 kstat.cpu_nice++;
 795                         else
 796                                 kstat.cpu_user++;
 797                 }
 798                 /* Update ITIMER_VIRT for current task if not in a system call */
 799                 if (current->it_virt_value && !(--current->it_virt_value)) {
 800                         current->it_virt_value = current->it_virt_incr;
 801                         send_sig(SIGVTALRM,current,1);
 802                 }
 803         } else {
 804                 current->stime++;
 805                 if(current->pid)
 806                         kstat.cpu_system++;
 807                 if (prof_buffer && current->pid) {
 808                         extern int _stext;
 809                         unsigned long ip = instruction_pointer(regs);
 810                         ip -= (unsigned long) &_stext;
 811                         ip >>= prof_shift;
 812                         if (ip < prof_len)
 813                                 prof_buffer[ip]++;
 814                 }
 815         }
 816         /*
 817          * check the cpu time limit on the process.
 818          */
 819         if ((current->rlim[RLIMIT_CPU].rlim_max != RLIM_INFINITY) &&
 820             (((current->stime + current->utime) / HZ) >= current->rlim[RLIMIT_CPU].rlim_max))
 821                 send_sig(SIGKILL, current, 1);
 822         if ((current->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) &&
 823             (((current->stime + current->utime) % HZ) == 0)) {
 824                 psecs = (current->stime + current->utime) / HZ;
 825                 /* send when equal */
 826                 if (psecs == current->rlim[RLIMIT_CPU].rlim_cur)
 827                         send_sig(SIGXCPU, current, 1);
 828                 /* and every five seconds thereafter. */
 829                 else if ((psecs > current->rlim[RLIMIT_CPU].rlim_cur) &&
 830                         ((psecs - current->rlim[RLIMIT_CPU].rlim_cur) % 5) == 0)
 831                         send_sig(SIGXCPU, current, 1);
 832         }
 833 
 834         if (current->pid && 0 > --current->counter) {
 835                 current->counter = 0;
 836                 need_resched = 1;
 837         }
 838         /* Update ITIMER_PROF for the current task */
 839         if (current->it_prof_value && !(--current->it_prof_value)) {
 840                 current->it_prof_value = current->it_prof_incr;
 841                 send_sig(SIGPROF,current,1);
 842         }
 843         for (mask = 1, tp = timer_table+0 ; mask ; tp++,mask += mask) {
 844                 if (mask > timer_active)
 845                         break;
 846                 if (!(mask & timer_active))
 847                         continue;
 848                 if (tp->expires > jiffies)
 849                         continue;
 850                 mark_bh(TIMER_BH);
 851         }
 852         cli();
 853         if (timer_head.next->expires <= jiffies)
 854                 mark_bh(TIMER_BH);
 855         if (tq_timer != &tq_last)
 856                 mark_bh(TQUEUE_BH);
 857         sti();
 858 }
 859 
 860 asmlinkage unsigned int sys_alarm(unsigned int seconds)
     /* [previous][next][first][last][top][bottom][index][help] */
 861 {
 862         struct itimerval it_new, it_old;
 863         unsigned int oldalarm;
 864 
 865         it_new.it_interval.tv_sec = it_new.it_interval.tv_usec = 0;
 866         it_new.it_value.tv_sec = seconds;
 867         it_new.it_value.tv_usec = 0;
 868         _setitimer(ITIMER_REAL, &it_new, &it_old);
 869         oldalarm = it_old.it_value.tv_sec;
 870         /* ehhh.. We can't return 0 if we have an alarm pending.. */
 871         /* And we'd better return too much than too little anyway */
 872         if (it_old.it_value.tv_usec)
 873                 oldalarm++;
 874         return oldalarm;
 875 }
 876 
 877 asmlinkage int sys_getpid(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 878 {
 879         return current->pid;
 880 }
 881 
 882 asmlinkage int sys_getppid(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 883 {
 884         return current->p_opptr->pid;
 885 }
 886 
 887 asmlinkage int sys_getuid(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 888 {
 889         return current->uid;
 890 }
 891 
 892 asmlinkage int sys_geteuid(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 893 {
 894         return current->euid;
 895 }
 896 
 897 asmlinkage int sys_getgid(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 898 {
 899         return current->gid;
 900 }
 901 
 902 asmlinkage int sys_getegid(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 903 {
 904         return current->egid;
 905 }
 906 
 907 asmlinkage int sys_nice(int increment)
     /* [previous][next][first][last][top][bottom][index][help] */
 908 {
 909         unsigned long newprio;
 910         int increase = 0;
 911 
 912         newprio = increment;
 913         if (increment < 0) {
 914                 if (!suser())
 915                         return -EPERM;
 916                 newprio = -increment;
 917                 increase = 1;
 918         }
 919         if (newprio > 40)
 920                 newprio = 40;
 921         /*
 922          * do a "normalization" of the priority (traditionally
 923          * unix nice values are -20..20, linux doesn't really
 924          * use that kind of thing, but uses the length of the
 925          * timeslice instead (default 150 msec). The rounding is
 926          * why we want to avoid negative values.
 927          */
 928         newprio = (newprio * DEF_PRIORITY + 10) / 20;
 929         increment = newprio;
 930         if (increase)
 931                 increment = -increment;
 932         newprio = current->priority - increment;
 933         if (newprio < 1)
 934                 newprio = 1;
 935         if (newprio > DEF_PRIORITY*2)
 936                 newprio = DEF_PRIORITY*2;
 937         current->priority = newprio;
 938         return 0;
 939 }
 940 
 941 static struct task_struct *find_process_by_pid(pid_t pid) {
     /* [previous][next][first][last][top][bottom][index][help] */
 942         struct task_struct *p, *q;
 943 
 944         if (pid == 0)
 945                 p = current;
 946         else {
 947                 p = 0;
 948                 for_each_task(q) {
 949                         if (q && q->pid == pid) {
 950                                 p = q;
 951                                 break;
 952                         }
 953                 }
 954         }
 955         return p;
 956 }
 957 
 958 static int setscheduler(pid_t pid, int policy, 
     /* [previous][next][first][last][top][bottom][index][help] */
 959                         struct sched_param *param)
 960 {
 961         int error;
 962         struct sched_param lp;
 963         struct task_struct *p;
 964 
 965         if (!param || pid < 0)
 966                 return -EINVAL;
 967 
 968         error = verify_area(VERIFY_READ, param, sizeof(struct sched_param));
 969         if (error)
 970                 return -EINVAL;
 971         memcpy_fromfs(&lp, param, sizeof(struct sched_param));
 972 
 973         p = find_process_by_pid(pid);
 974         if (!p)
 975                 return -ESRCH;
 976                         
 977         if (policy < 0)
 978                 policy = p->policy;
 979         else if (policy != SCHED_FIFO && policy != SCHED_RR &&
 980                  policy != SCHED_OTHER)
 981                 return -EINVAL;
 982         
 983         /*
 984          * Valid priorities for SCHED_FIFO and SCHED_RR are 1..99, valid
 985          * priority for SCHED_OTHER is 0.
 986          */
 987         if (lp.sched_priority < 0 || lp.sched_priority > 99)
 988                 return -EINVAL;
 989         if ((policy == SCHED_OTHER) != (lp.sched_priority == 0))
 990                 return -EINVAL;
 991 
 992         if ((policy == SCHED_FIFO || policy == SCHED_RR) && !suser())
 993                 return -EPERM;
 994         if ((current->euid != p->euid) && (current->euid != p->uid) &&
 995             !suser())
 996                 return -EPERM;
 997 
 998         p->policy = policy;
 999         p->rt_priority = lp.sched_priority;
1000         schedule();
1001 
1002         return 0;
1003 }
1004 
1005 asmlinkage int sys_sched_setscheduler(pid_t pid, int policy, 
     /* [previous][next][first][last][top][bottom][index][help] */
1006                                       struct sched_param *param)
1007 {
1008         return setscheduler(pid, policy, param);
1009 }
1010 
1011 asmlinkage int sys_sched_setparam(pid_t pid, struct sched_param *param)
     /* [previous][next][first][last][top][bottom][index][help] */
1012 {
1013         return setscheduler(pid, -1, param);
1014 }
1015 
1016 asmlinkage int sys_sched_getscheduler(pid_t pid)
     /* [previous][next][first][last][top][bottom][index][help] */
1017 {
1018         struct task_struct *p;
1019 
1020         if (pid < 0)
1021                 return -EINVAL;
1022 
1023         p = find_process_by_pid(pid);
1024         if (!p)
1025                 return -ESRCH;
1026                         
1027         return p->policy;
1028 }
1029 
1030 asmlinkage int sys_sched_getparam(pid_t pid, struct sched_param *param)
     /* [previous][next][first][last][top][bottom][index][help] */
1031 {
1032         int error;
1033         struct task_struct *p;
1034         struct sched_param lp;
1035 
1036         if (!param || pid < 0)
1037                 return -EINVAL;
1038 
1039         error = verify_area(VERIFY_WRITE, param, sizeof(struct sched_param));
1040         if (error)
1041                 return -EINVAL;
1042 
1043         p = find_process_by_pid(pid);
1044         if (!p)
1045                 return -ESRCH;
1046 
1047         lp.sched_priority = p->rt_priority;
1048         memcpy_tofs(param, &lp, sizeof(struct sched_param));
1049 
1050         return 0;
1051 }
1052 
1053 asmlinkage int sys_sched_yield(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1054 {
1055         /* ... not yet implemented ... */
1056         return -ENOSYS;
1057 }
1058 
1059 asmlinkage int sys_sched_get_priority_max(int policy)
     /* [previous][next][first][last][top][bottom][index][help] */
1060 {
1061         switch (policy) {
1062               case SCHED_FIFO:
1063               case SCHED_RR:
1064                 return 99;
1065               case SCHED_OTHER:
1066                 return 0;
1067         }
1068 
1069         return -EINVAL;
1070 }
1071 
1072 asmlinkage int sys_sched_get_priority_min(int policy)
     /* [previous][next][first][last][top][bottom][index][help] */
1073 {
1074         switch (policy) {
1075               case SCHED_FIFO:
1076               case SCHED_RR:
1077                 return 1;
1078               case SCHED_OTHER:
1079                 return 0;
1080         }
1081 
1082         return -EINVAL;
1083 }
1084 
1085 asmlinkage int sys_sched_rr_get_interval(pid_t pid, struct timespec *interval)
     /* [previous][next][first][last][top][bottom][index][help] */
1086 {
1087         int error;
1088         struct timespec t;
1089 
1090         error = verify_area(VERIFY_WRITE, interval, sizeof(struct timespec));
1091         if (error)
1092                 return -EINVAL;
1093         
1094         t.tv_sec = 0;
1095         t.tv_nsec = 0;   /* <-- Linus, please fill correct value in here */
1096         return -ENOSYS;  /* and then delete this line. Thanks!           */
1097         memcpy_tofs(interval, &t, sizeof(struct timespec));
1098 
1099         return 0;
1100 }
1101 
1102 static void show_task(int nr,struct task_struct * p)
     /* [previous][next][first][last][top][bottom][index][help] */
1103 {
1104         unsigned long free;
1105         static const char * stat_nam[] = { "R", "S", "D", "Z", "T", "W" };
1106 
1107         printk("%-8s %3d ", p->comm, (p == current) ? -nr : nr);
1108         if (((unsigned) p->state) < sizeof(stat_nam)/sizeof(char *))
1109                 printk(stat_nam[p->state]);
1110         else
1111                 printk(" ");
1112 #if ((~0UL) == 0xffffffff)
1113         if (p == current)
1114                 printk(" current  ");
1115         else
1116                 printk(" %08lX ", thread_saved_pc(&p->tss));
1117 #else
1118         if (p == current)
1119                 printk("   current task   ");
1120         else
1121                 printk(" %016lx ", thread_saved_pc(&p->tss));
1122 #endif
1123         for (free = 1; free < PAGE_SIZE/sizeof(long) ; free++) {
1124                 if (((unsigned long *)p->kernel_stack_page)[free])
1125                         break;
1126         }
1127         printk("%5lu %5d %6d ", free*sizeof(long), p->pid, p->p_pptr->pid);
1128         if (p->p_cptr)
1129                 printk("%5d ", p->p_cptr->pid);
1130         else
1131                 printk("      ");
1132         if (p->p_ysptr)
1133                 printk("%7d", p->p_ysptr->pid);
1134         else
1135                 printk("       ");
1136         if (p->p_osptr)
1137                 printk(" %5d\n", p->p_osptr->pid);
1138         else
1139                 printk("\n");
1140 }
1141 
1142 void show_state(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1143 {
1144         int i;
1145 
1146 #if ((~0UL) == 0xffffffff)
1147         printk("\n"
1148                "                         free                        sibling\n");
1149         printk("  task             PC    stack   pid father child younger older\n");
1150 #else
1151         printk("\n"
1152                "                                 free                        sibling\n");
1153         printk("  task                 PC        stack   pid father child younger older\n");
1154 #endif
1155         for (i=0 ; i<NR_TASKS ; i++)
1156                 if (task[i])
1157                         show_task(i,task[i]);
1158 }
1159 
1160 void sched_init(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1161 {
1162         /*
1163          *      We have to do a little magic to get the first
1164          *      process right in SMP mode.
1165          */
1166         int cpu=smp_processor_id();
1167         current_set[cpu]=&init_task;
1168 #ifdef __SMP__  
1169         init_task.processor=cpu;
1170 #endif
1171         bh_base[TIMER_BH].routine = timer_bh;
1172         bh_base[TQUEUE_BH].routine = tqueue_bh;
1173         bh_base[IMMEDIATE_BH].routine = immediate_bh;
1174         enable_bh(TIMER_BH);
1175         enable_bh(TQUEUE_BH);
1176         enable_bh(IMMEDIATE_BH);
1177 }

/* [previous][next][first][last][top][bottom][index][help] */