root/kernel/sched.c

/* [previous][next][first][last][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. add_to_runqueue
  2. del_from_runqueue
  3. move_last_runqueue
  4. wake_up_process
  5. process_timeout
  6. goodness
  7. schedule
  8. sys_pause
  9. wake_up
  10. wake_up_interruptible
  11. __down
  12. __sleep_on
  13. interruptible_sleep_on
  14. sleep_on
  15. add_timer
  16. del_timer
  17. count_active_tasks
  18. calc_load
  19. second_overflow
  20. timer_bh
  21. tqueue_bh
  22. immediate_bh
  23. do_timer
  24. sys_alarm
  25. sys_getpid
  26. sys_getppid
  27. sys_getuid
  28. sys_geteuid
  29. sys_getgid
  30. sys_getegid
  31. sys_nice
  32. find_process_by_pid
  33. setscheduler
  34. sys_sched_setscheduler
  35. sys_sched_setparam
  36. sys_sched_getscheduler
  37. sys_sched_getparam
  38. sys_sched_yield
  39. sys_sched_get_priority_max
  40. sys_sched_get_priority_min
  41. sys_sched_rr_get_interval
  42. show_task
  43. show_state
  44. sched_init

   1 /*
   2  *  linux/kernel/sched.c
   3  *
   4  *  Copyright (C) 1991, 1992  Linus Torvalds
   5  */
   6 
   7 /*
   8  * 'sched.c' is the main kernel file. It contains scheduling primitives
   9  * (sleep_on, wakeup, schedule etc) as well as a number of simple system
  10  * call functions (type getpid(), which just extracts a field from
  11  * current-task
  12  */
  13 
  14 #include <linux/signal.h>
  15 #include <linux/sched.h>
  16 #include <linux/timer.h>
  17 #include <linux/kernel.h>
  18 #include <linux/kernel_stat.h>
  19 #include <linux/fdreg.h>
  20 #include <linux/errno.h>
  21 #include <linux/time.h>
  22 #include <linux/ptrace.h>
  23 #include <linux/delay.h>
  24 #include <linux/interrupt.h>
  25 #include <linux/tqueue.h>
  26 #include <linux/resource.h>
  27 #include <linux/mm.h>
  28 #include <linux/smp.h>
  29 
  30 #include <asm/system.h>
  31 #include <asm/io.h>
  32 #include <asm/segment.h>
  33 #include <asm/pgtable.h>
  34 #include <asm/mmu_context.h>
  35 
  36 #include <linux/timex.h>
  37 
  38 /*
  39  * kernel variables
  40  */
  41 
  42 int securelevel = 0;                    /* system security level */
  43 
  44 long tick = 1000000 / HZ;               /* timer interrupt period */
  45 volatile struct timeval xtime;          /* The current time */
  46 int tickadj = 500/HZ;                   /* microsecs */
  47 
  48 DECLARE_TASK_QUEUE(tq_timer);
  49 DECLARE_TASK_QUEUE(tq_immediate);
  50 DECLARE_TASK_QUEUE(tq_scheduler);
  51 
  52 /*
  53  * phase-lock loop variables
  54  */
  55 int time_state = TIME_BAD;     /* clock synchronization status */
  56 int time_status = STA_UNSYNC | STA_PLL; /* clock status bits */
  57 long time_offset = 0;           /* time adjustment (us) */
  58 long time_constant = 2;         /* pll time constant */
  59 long time_tolerance = MAXFREQ;  /* frequency tolerance (ppm) */
  60 long time_precision = 1;        /* clock precision (us) */
  61 long time_maxerror = 0x70000000;/* maximum error */
  62 long time_esterror = 0x70000000;/* estimated error */
  63 long time_phase = 0;            /* phase offset (scaled us) */
  64 long time_freq = 0;             /* frequency offset (scaled ppm) */
  65 long time_adj = 0;              /* tick adjust (scaled 1 / HZ) */
  66 long time_reftime = 0;          /* time at last adjustment (s) */
  67 
  68 long time_adjust = 0;
  69 long time_adjust_step = 0;
  70 
  71 int need_resched = 0;
  72 unsigned long event = 0;
  73 
  74 extern int _setitimer(int, struct itimerval *, struct itimerval *);
  75 unsigned long * prof_buffer = NULL;
  76 unsigned long prof_len = 0;
  77 unsigned long prof_shift = 0;
  78 
  79 #define _S(nr) (1<<((nr)-1))
  80 
  81 extern void mem_use(void);
  82 
  83 static unsigned long init_kernel_stack[1024] = { STACK_MAGIC, };
  84 unsigned long init_user_stack[1024] = { STACK_MAGIC, };
  85 static struct vm_area_struct init_mmap = INIT_MMAP;
  86 static struct fs_struct init_fs = INIT_FS;
  87 static struct files_struct init_files = INIT_FILES;
  88 static struct signal_struct init_signals = INIT_SIGNALS;
  89 
  90 struct mm_struct init_mm = INIT_MM;
  91 struct task_struct init_task = INIT_TASK;
  92 
  93 unsigned long volatile jiffies=0;
  94 
  95 struct task_struct *current_set[NR_CPUS];
  96 struct task_struct *last_task_used_math = NULL;
  97 
  98 struct task_struct * task[NR_TASKS] = {&init_task, };
  99 
 100 struct kernel_stat kstat = { 0 };
 101 
 102 static inline void add_to_runqueue(struct task_struct * p)
     /* [previous][next][first][last][top][bottom][index][help] */
 103 {
 104 #if 1   /* sanity tests */
 105         if (p->next_run || p->prev_run) {
 106                 printk("task already on run-queue\n");
 107                 return;
 108         }
 109 #endif
 110         if (p->counter > current->counter + 3)
 111                 need_resched = 1;
 112         nr_running++;
 113         (p->prev_run = init_task.prev_run)->next_run = p;
 114         p->next_run = &init_task;
 115         init_task.prev_run = p;
 116 }
 117 
 118 static inline void del_from_runqueue(struct task_struct * p)
     /* [previous][next][first][last][top][bottom][index][help] */
 119 {
 120         struct task_struct *next = p->next_run;
 121         struct task_struct *prev = p->prev_run;
 122 
 123 #if 1   /* sanity tests */
 124         if (!next || !prev) {
 125                 printk("task not on run-queue\n");
 126                 return;
 127         }
 128 #endif
 129         if (p == &init_task) {
 130                 static int nr = 0;
 131                 if (nr < 5) {
 132                         nr++;
 133                         printk("idle task may not sleep\n");
 134                 }
 135                 return;
 136         }
 137         nr_running--;
 138         next->prev_run = prev;
 139         prev->next_run = next;
 140         p->next_run = NULL;
 141         p->prev_run = NULL;
 142 }
 143 
 144 static inline void move_last_runqueue(struct task_struct * p)
     /* [previous][next][first][last][top][bottom][index][help] */
 145 {
 146         struct task_struct *next = p->next_run;
 147         struct task_struct *prev = p->prev_run;
 148 
 149         next->prev_run = prev;
 150         prev->next_run = next;
 151         (p->prev_run = init_task.prev_run)->next_run = p;
 152         p->next_run = &init_task;
 153         init_task.prev_run = p;
 154 }
 155 
 156 /*
 157  * Wake up a process. Put it on the run-queue if it's not
 158  * already there.  The "current" process is always on the
 159  * run-queue (except when the actual re-schedule is in
 160  * progress), and as such you're allowed to do the simpler
 161  * "current->state = TASK_RUNNING" to mark yourself runnable
 162  * without the overhead of this.
 163  */
 164 inline void wake_up_process(struct task_struct * p)
     /* [previous][next][first][last][top][bottom][index][help] */
 165 {
 166         unsigned long flags;
 167 
 168         save_flags(flags);
 169         cli();
 170         p->state = TASK_RUNNING;
 171         if (!p->next_run)
 172                 add_to_runqueue(p);
 173         restore_flags(flags);
 174 }
 175 
 176 static void process_timeout(unsigned long __data)
     /* [previous][next][first][last][top][bottom][index][help] */
 177 {
 178         struct task_struct * p = (struct task_struct *) __data;
 179 
 180         p->timeout = 0;
 181         wake_up_process(p);
 182 }
 183 
 184 /*
 185  * This is the function that decides how desireable a process is..
 186  * You can weigh different processes against each other depending
 187  * on what CPU they've run on lately etc to try to handle cache
 188  * and TLB miss penalties.
 189  *
 190  * Return values:
 191  *       -1000: never select this
 192  *           0: out of time, recalculate counters (but it might still be
 193  *              selected)
 194  *         +ve: "goodness" value (the larger, the better)
 195  *       +1000: realtime process, select this.
 196  */
 197 static inline int goodness(struct task_struct * p, int this_cpu)
     /* [previous][next][first][last][top][bottom][index][help] */
 198 {
 199         int weight;
 200 
 201 #ifdef __SMP__  
 202         /* We are not permitted to run a task someone else is running */
 203         if (p->processor != NO_PROC_ID)
 204                 return -1000;
 205 #endif
 206 
 207         /*
 208          * Realtime process, select the first one on the
 209          * runqueue (taking priorities within processes
 210          * into account).
 211          */
 212         if (p->policy != SCHED_OTHER)
 213                 return 1000 + p->rt_priority;
 214 
 215         /*
 216          * Give the process a first-approximation goodness value
 217          * according to the number of clock-ticks it has left.
 218          *
 219          * Don't do any other calculations if the time slice is
 220          * over..
 221          */
 222         weight = p->counter;
 223         if (weight) {
 224                         
 225 #ifdef __SMP__
 226                 /* Give a largish advantage to the same processor...   */
 227                 /* (this is equivalent to penalizing other processors) */
 228                 if (p->last_processor == this_cpu)
 229                         weight += PROC_CHANGE_PENALTY;
 230 #endif
 231 
 232                 /* .. and a slight advantage to the current process */
 233                 if (p == current)
 234                         weight += 1;
 235         }
 236 
 237         return weight;
 238 }
 239 
 240 /*
 241  *  'schedule()' is the scheduler function. It's a very simple and nice
 242  * scheduler: it's not perfect, but certainly works for most things.
 243  *
 244  * The goto is "interesting".
 245  *
 246  *   NOTE!!  Task 0 is the 'idle' task, which gets called when no other
 247  * tasks can run. It can not be killed, and it cannot sleep. The 'state'
 248  * information in task[0] is never used.
 249  */
 250 asmlinkage void schedule(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 251 {
 252         int c;
 253         struct task_struct * p;
 254         struct task_struct * next;
 255         unsigned long timeout = 0;
 256         int this_cpu=smp_processor_id();
 257 
 258 /* check alarm, wake up any interruptible tasks that have got a signal */
 259 
 260         if (intr_count) {
 261                 printk("Aiee: scheduling in interrupt\n");
 262                 return;
 263         }
 264         run_task_queue(&tq_scheduler);
 265 
 266         need_resched = 0;
 267         cli();
 268         /* move an exhausted RR process to be last.. */
 269         if (!current->counter && current->policy == SCHED_RR) {
 270                 current->counter = current->priority;
 271                 move_last_runqueue(current);
 272         }
 273         switch (current->state) {
 274                 case TASK_INTERRUPTIBLE:
 275                         if (current->signal & ~current->blocked)
 276                                 goto makerunnable;
 277                         timeout = current->timeout;
 278                         if (timeout && (timeout <= jiffies)) {
 279                                 current->timeout = 0;
 280                                 timeout = 0;
 281                 makerunnable:
 282                                 current->state = TASK_RUNNING;
 283                                 break;
 284                         }
 285                 default:
 286                         del_from_runqueue(current);
 287                 case TASK_RUNNING:
 288         }
 289         p = init_task.next_run;
 290         sti();
 291         
 292 #ifdef __SMP__
 293         /*
 294          *      This is safe as we do not permit re-entry of schedule()
 295          */
 296         current->processor = NO_PROC_ID;        
 297 #endif  
 298 
 299 /*
 300  * Note! there may appear new tasks on the run-queue during this, as
 301  * interrupts are enabled. However, they will be put on front of the
 302  * list, so our list starting at "p" is essentially fixed.
 303  */
 304 /* this is the scheduler proper: */
 305         c = -1000;
 306         next = &init_task;
 307         while (p != &init_task) {
 308                 int weight = goodness(p, this_cpu);
 309                 if (weight > c)
 310                         c = weight, next = p;
 311                 p = p->next_run;
 312         }
 313 
 314         /* if all runnable processes have "counter == 0", re-calculate counters */
 315         if (!c) {
 316                 for_each_task(p)
 317                         p->counter = (p->counter >> 1) + p->priority;
 318         }
 319 #ifdef __SMP__  
 320         
 321         /*
 322          *      Context switching between two idle threads is pointless.
 323          */
 324         if(!current->pid && !next->pid)
 325                 next=current;
 326         /*
 327          *      Allocate process to CPU
 328          */
 329          
 330          next->processor = this_cpu;
 331          next->last_processor = this_cpu;
 332          
 333 #endif   
 334 #ifdef __SMP_PROF__ 
 335         /* mark processor running an idle thread */
 336         if (0==next->pid)
 337                 set_bit(this_cpu,&smp_idle_map);
 338         else
 339                 clear_bit(this_cpu,&smp_idle_map);
 340 #endif
 341         if (current != next) {
 342                 struct timer_list timer;
 343 
 344                 kstat.context_swtch++;
 345                 if (timeout) {
 346                         init_timer(&timer);
 347                         timer.expires = timeout;
 348                         timer.data = (unsigned long) current;
 349                         timer.function = process_timeout;
 350                         add_timer(&timer);
 351                 }
 352                 get_mmu_context(next);
 353                 switch_to(next);
 354                 if (timeout)
 355                         del_timer(&timer);
 356         }
 357 }
 358 
 359 asmlinkage int sys_pause(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 360 {
 361         current->state = TASK_INTERRUPTIBLE;
 362         schedule();
 363         return -ERESTARTNOHAND;
 364 }
 365 
 366 /*
 367  * wake_up doesn't wake up stopped processes - they have to be awakened
 368  * with signals or similar.
 369  *
 370  * Note that this doesn't need cli-sti pairs: interrupts may not change
 371  * the wait-queue structures directly, but only call wake_up() to wake
 372  * a process. The process itself must remove the queue once it has woken.
 373  */
 374 void wake_up(struct wait_queue **q)
     /* [previous][next][first][last][top][bottom][index][help] */
 375 {
 376         struct wait_queue *tmp;
 377         struct task_struct * p;
 378 
 379         if (!q || !(tmp = *q))
 380                 return;
 381         do {
 382                 if ((p = tmp->task) != NULL) {
 383                         if ((p->state == TASK_UNINTERRUPTIBLE) ||
 384                             (p->state == TASK_INTERRUPTIBLE))
 385                                 wake_up_process(p);
 386                 }
 387                 if (!tmp->next) {
 388                         printk("wait_queue is bad (eip = %p)\n",
 389                                 __builtin_return_address(0));
 390                         printk("        q = %p\n",q);
 391                         printk("       *q = %p\n",*q);
 392                         printk("      tmp = %p\n",tmp);
 393                         break;
 394                 }
 395                 tmp = tmp->next;
 396         } while (tmp != *q);
 397 }
 398 
 399 void wake_up_interruptible(struct wait_queue **q)
     /* [previous][next][first][last][top][bottom][index][help] */
 400 {
 401         struct wait_queue *tmp;
 402         struct task_struct * p;
 403 
 404         if (!q || !(tmp = *q))
 405                 return;
 406         do {
 407                 if ((p = tmp->task) != NULL) {
 408                         if (p->state == TASK_INTERRUPTIBLE)
 409                                 wake_up_process(p);
 410                 }
 411                 if (!tmp->next) {
 412                         printk("wait_queue is bad (eip = %p)\n",
 413                                 __builtin_return_address(0));
 414                         printk("        q = %p\n",q);
 415                         printk("       *q = %p\n",*q);
 416                         printk("      tmp = %p\n",tmp);
 417                         break;
 418                 }
 419                 tmp = tmp->next;
 420         } while (tmp != *q);
 421 }
 422 
 423 void __down(struct semaphore * sem)
     /* [previous][next][first][last][top][bottom][index][help] */
 424 {
 425         struct wait_queue wait = { current, NULL };
 426         add_wait_queue(&sem->wait, &wait);
 427         current->state = TASK_UNINTERRUPTIBLE;
 428         while (sem->count <= 0) {
 429                 schedule();
 430                 current->state = TASK_UNINTERRUPTIBLE;
 431         }
 432         current->state = TASK_RUNNING;
 433         remove_wait_queue(&sem->wait, &wait);
 434 }
 435 
 436 static inline void __sleep_on(struct wait_queue **p, int state)
     /* [previous][next][first][last][top][bottom][index][help] */
 437 {
 438         unsigned long flags;
 439         struct wait_queue wait = { current, NULL };
 440 
 441         if (!p)
 442                 return;
 443         if (current == task[0])
 444                 panic("task[0] trying to sleep");
 445         current->state = state;
 446         add_wait_queue(p, &wait);
 447         save_flags(flags);
 448         sti();
 449         schedule();
 450         remove_wait_queue(p, &wait);
 451         restore_flags(flags);
 452 }
 453 
 454 void interruptible_sleep_on(struct wait_queue **p)
     /* [previous][next][first][last][top][bottom][index][help] */
 455 {
 456         __sleep_on(p,TASK_INTERRUPTIBLE);
 457 }
 458 
 459 void sleep_on(struct wait_queue **p)
     /* [previous][next][first][last][top][bottom][index][help] */
 460 {
 461         __sleep_on(p,TASK_UNINTERRUPTIBLE);
 462 }
 463 
 464 /*
 465  * The head for the timer-list has a "expires" field of MAX_UINT,
 466  * and the sorting routine counts on this..
 467  */
 468 static struct timer_list timer_head = { &timer_head, &timer_head, ~0, 0, NULL };
 469 #define SLOW_BUT_DEBUGGING_TIMERS 1
 470 
 471 void add_timer(struct timer_list * timer)
     /* [previous][next][first][last][top][bottom][index][help] */
 472 {
 473         unsigned long flags;
 474         struct timer_list *p;
 475 
 476 #if SLOW_BUT_DEBUGGING_TIMERS
 477         if (timer->next || timer->prev) {
 478                 printk("add_timer() called with non-zero list from %p\n",
 479                         __builtin_return_address(0));
 480                 return;
 481         }
 482 #endif
 483         p = &timer_head;
 484         save_flags(flags);
 485         cli();
 486         do {
 487                 p = p->next;
 488         } while (timer->expires > p->expires);
 489         timer->next = p;
 490         timer->prev = p->prev;
 491         p->prev = timer;
 492         timer->prev->next = timer;
 493         restore_flags(flags);
 494 }
 495 
 496 int del_timer(struct timer_list * timer)
     /* [previous][next][first][last][top][bottom][index][help] */
 497 {
 498         unsigned long flags;
 499 #if SLOW_BUT_DEBUGGING_TIMERS
 500         struct timer_list * p;
 501 
 502         p = &timer_head;
 503         save_flags(flags);
 504         cli();
 505         while ((p = p->next) != &timer_head) {
 506                 if (p == timer) {
 507                         timer->next->prev = timer->prev;
 508                         timer->prev->next = timer->next;
 509                         timer->next = timer->prev = NULL;
 510                         restore_flags(flags);
 511                         return 1;
 512                 }
 513         }
 514         if (timer->next || timer->prev)
 515                 printk("del_timer() called from %p with timer not initialized\n",
 516                         __builtin_return_address(0));
 517         restore_flags(flags);
 518         return 0;
 519 #else   
 520         save_flags(flags);
 521         cli();
 522         if (timer->next) {
 523                 timer->next->prev = timer->prev;
 524                 timer->prev->next = timer->next;
 525                 timer->next = timer->prev = NULL;
 526                 restore_flags(flags);
 527                 return 1;
 528         }
 529         restore_flags(flags);
 530         return 0;
 531 #endif
 532 }
 533 
 534 unsigned long timer_active = 0;
 535 struct timer_struct timer_table[32];
 536 
 537 /*
 538  * Hmm.. Changed this, as the GNU make sources (load.c) seems to
 539  * imply that avenrun[] is the standard name for this kind of thing.
 540  * Nothing else seems to be standardized: the fractional size etc
 541  * all seem to differ on different machines.
 542  */
 543 unsigned long avenrun[3] = { 0,0,0 };
 544 
 545 /*
 546  * Nr of active tasks - counted in fixed-point numbers
 547  */
 548 static unsigned long count_active_tasks(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 549 {
 550         struct task_struct **p;
 551         unsigned long nr = 0;
 552 
 553         for(p = &LAST_TASK; p > &FIRST_TASK; --p)
 554                 if (*p && ((*p)->state == TASK_RUNNING ||
 555                            (*p)->state == TASK_UNINTERRUPTIBLE ||
 556                            (*p)->state == TASK_SWAPPING))
 557                         nr += FIXED_1;
 558 #ifdef __SMP__
 559         nr-=(smp_num_cpus-1)*FIXED_1;
 560 #endif                  
 561         return nr;
 562 }
 563 
 564 static inline void calc_load(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 565 {
 566         unsigned long active_tasks; /* fixed-point */
 567         static int count = LOAD_FREQ;
 568 
 569         if (count-- > 0)
 570                 return;
 571         count = LOAD_FREQ;
 572         active_tasks = count_active_tasks();
 573         CALC_LOAD(avenrun[0], EXP_1, active_tasks);
 574         CALC_LOAD(avenrun[1], EXP_5, active_tasks);
 575         CALC_LOAD(avenrun[2], EXP_15, active_tasks);
 576 }
 577 
 578 /*
 579  * this routine handles the overflow of the microsecond field
 580  *
 581  * The tricky bits of code to handle the accurate clock support
 582  * were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame.
 583  * They were originally developed for SUN and DEC kernels.
 584  * All the kudos should go to Dave for this stuff.
 585  *
 586  */
 587 static void second_overflow(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 588 {
 589     long ltemp;
 590 
 591     /* Bump the maxerror field */
 592     time_maxerror = (0x70000000-time_maxerror <
 593                      time_tolerance >> SHIFT_USEC) ?
 594         0x70000000 : (time_maxerror + (time_tolerance >> SHIFT_USEC));
 595 
 596     /*
 597      * Leap second processing. If in leap-insert state at
 598      * the end of the day, the system clock is set back one
 599      * second; if in leap-delete state, the system clock is
 600      * set ahead one second. The microtime() routine or
 601      * external clock driver will insure that reported time
 602      * is always monotonic. The ugly divides should be
 603      * replaced.
 604      */
 605     switch (time_state) {
 606 
 607     case TIME_OK:
 608         if (time_status & STA_INS)
 609             time_state = TIME_INS;
 610         else if (time_status & STA_DEL)
 611             time_state = TIME_DEL;
 612         break;
 613 
 614     case TIME_INS:
 615         if (xtime.tv_sec % 86400 == 0) {
 616             xtime.tv_sec--;
 617             time_state = TIME_OOP;
 618             printk("Clock: inserting leap second 23:59:60 UTC\n");
 619         }
 620         break;
 621 
 622     case TIME_DEL:
 623         if ((xtime.tv_sec + 1) % 86400 == 0) {
 624             xtime.tv_sec++;
 625             time_state = TIME_WAIT;
 626             printk("Clock: deleting leap second 23:59:59 UTC\n");
 627         }
 628         break;
 629 
 630     case TIME_OOP:
 631 
 632         time_state = TIME_WAIT;
 633         break;
 634 
 635     case TIME_WAIT:
 636         if (!(time_status & (STA_INS | STA_DEL)))
 637             time_state = TIME_OK;
 638     }
 639 
 640     /*
 641      * Compute the phase adjustment for the next second. In
 642      * PLL mode, the offset is reduced by a fixed factor
 643      * times the time constant. In FLL mode the offset is
 644      * used directly. In either mode, the maximum phase
 645      * adjustment for each second is clamped so as to spread
 646      * the adjustment over not more than the number of
 647      * seconds between updates.
 648      */
 649     if (time_offset < 0) {
 650         ltemp = -time_offset;
 651         if (!(time_status & STA_FLL))
 652             ltemp >>= SHIFT_KG + time_constant;
 653         if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE)
 654             ltemp = (MAXPHASE / MINSEC) <<
 655                 SHIFT_UPDATE;
 656         time_offset += ltemp;
 657         time_adj = -ltemp << (SHIFT_SCALE - SHIFT_HZ -
 658                               SHIFT_UPDATE);
 659     } else {
 660         ltemp = time_offset;
 661         if (!(time_status & STA_FLL))
 662             ltemp >>= SHIFT_KG + time_constant;
 663         if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE)
 664             ltemp = (MAXPHASE / MINSEC) <<
 665                 SHIFT_UPDATE;
 666         time_offset -= ltemp;
 667         time_adj = ltemp << (SHIFT_SCALE - SHIFT_HZ -
 668                              SHIFT_UPDATE);
 669     }
 670 
 671     /*
 672      * Compute the frequency estimate and additional phase
 673      * adjustment due to frequency error for the next
 674      * second. When the PPS signal is engaged, gnaw on the
 675      * watchdog counter and update the frequency computed by
 676      * the pll and the PPS signal.
 677      */
 678     pps_valid++;
 679     if (pps_valid == PPS_VALID) {
 680         pps_jitter = MAXTIME;
 681         pps_stabil = MAXFREQ;
 682         time_status &= ~(STA_PPSSIGNAL | STA_PPSJITTER |
 683                          STA_PPSWANDER | STA_PPSERROR);
 684     }
 685     ltemp = time_freq + pps_freq;
 686     if (ltemp < 0)
 687         time_adj -= -ltemp >>
 688             (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE);
 689     else
 690         time_adj += ltemp >>
 691             (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE);
 692 
 693 #if HZ == 100
 694     /* compensate for (HZ==100) != 128. Add 25% to get 125; => only 3% error */
 695     if (time_adj < 0)
 696         time_adj -= -time_adj >> 2;
 697     else
 698         time_adj += time_adj >> 2;
 699 #endif
 700 }
 701 
 702 /*
 703  * disregard lost ticks for now.. We don't care enough.
 704  */
 705 static void timer_bh(void * unused)
     /* [previous][next][first][last][top][bottom][index][help] */
 706 {
 707         unsigned long mask;
 708         struct timer_struct *tp;
 709         struct timer_list * timer;
 710 
 711         cli();
 712         while ((timer = timer_head.next) != &timer_head && timer->expires <= jiffies) {
 713                 void (*fn)(unsigned long) = timer->function;
 714                 unsigned long data = timer->data;
 715                 timer->next->prev = timer->prev;
 716                 timer->prev->next = timer->next;
 717                 timer->next = timer->prev = NULL;
 718                 sti();
 719                 fn(data);
 720                 cli();
 721         }
 722         sti();
 723         
 724         for (mask = 1, tp = timer_table+0 ; mask ; tp++,mask += mask) {
 725                 if (mask > timer_active)
 726                         break;
 727                 if (!(mask & timer_active))
 728                         continue;
 729                 if (tp->expires > jiffies)
 730                         continue;
 731                 timer_active &= ~mask;
 732                 tp->fn();
 733                 sti();
 734         }
 735 }
 736 
 737 void tqueue_bh(void * unused)
     /* [previous][next][first][last][top][bottom][index][help] */
 738 {
 739         run_task_queue(&tq_timer);
 740 }
 741 
 742 void immediate_bh(void * unused)
     /* [previous][next][first][last][top][bottom][index][help] */
 743 {
 744         run_task_queue(&tq_immediate);
 745 }
 746 
 747 void do_timer(struct pt_regs * regs)
     /* [previous][next][first][last][top][bottom][index][help] */
 748 {
 749         unsigned long mask;
 750         struct timer_struct *tp;
 751         long ltemp, psecs;
 752 #ifdef  __SMP_PROF__
 753         int cpu,i;
 754 #endif
 755 
 756         /* Advance the phase, once it gets to one microsecond, then
 757          * advance the tick more.
 758          */
 759         time_phase += time_adj;
 760         if (time_phase <= -FINEUSEC) {
 761                 ltemp = -time_phase >> SHIFT_SCALE;
 762                 time_phase += ltemp << SHIFT_SCALE;
 763                 xtime.tv_usec += tick + time_adjust_step - ltemp;
 764         }
 765         else if (time_phase >= FINEUSEC) {
 766                 ltemp = time_phase >> SHIFT_SCALE;
 767                 time_phase -= ltemp << SHIFT_SCALE;
 768                 xtime.tv_usec += tick + time_adjust_step + ltemp;
 769         } else
 770                 xtime.tv_usec += tick + time_adjust_step;
 771 
 772         if (time_adjust) {
 773             /* We are doing an adjtime thing. 
 774              *
 775              * Modify the value of the tick for next time.
 776              * Note that a positive delta means we want the clock
 777              * to run fast. This means that the tick should be bigger
 778              *
 779              * Limit the amount of the step for *next* tick to be
 780              * in the range -tickadj .. +tickadj
 781              */
 782              if (time_adjust > tickadj)
 783                time_adjust_step = tickadj;
 784              else if (time_adjust < -tickadj)
 785                time_adjust_step = -tickadj;
 786              else
 787                time_adjust_step = time_adjust;
 788              
 789             /* Reduce by this step the amount of time left  */
 790             time_adjust -= time_adjust_step;
 791         }
 792         else
 793             time_adjust_step = 0;
 794 
 795         if (xtime.tv_usec >= 1000000) {
 796             xtime.tv_usec -= 1000000;
 797             xtime.tv_sec++;
 798             second_overflow();
 799         }
 800 
 801         jiffies++;
 802         calc_load();
 803 #ifdef  __SMP_PROF__
 804         smp_idle_count[NR_CPUS]++;    /* count timer ticks */
 805         cpu = smp_processor_id();
 806         for (i=0;i<(0==smp_num_cpus?1:smp_num_cpus);i++) 
 807                 if (test_bit(i,&smp_idle_map)) smp_idle_count[i]++;
 808 #endif
 809         if (user_mode(regs)) {
 810                 current->utime++;
 811                 if (current->pid) {
 812                         if (current->priority < DEF_PRIORITY)
 813                                 kstat.cpu_nice++;
 814                         else
 815                                 kstat.cpu_user++;
 816                 }
 817                 /* Update ITIMER_VIRT for current task if not in a system call */
 818                 if (current->it_virt_value && !(--current->it_virt_value)) {
 819                         current->it_virt_value = current->it_virt_incr;
 820                         send_sig(SIGVTALRM,current,1);
 821                 }
 822         } else {
 823                 current->stime++;
 824                 if(current->pid)
 825                         kstat.cpu_system++;
 826                 if (prof_buffer && current->pid) {
 827                         extern int _stext;
 828                         unsigned long ip = instruction_pointer(regs);
 829                         ip -= (unsigned long) &_stext;
 830                         ip >>= prof_shift;
 831                         if (ip < prof_len)
 832                                 prof_buffer[ip]++;
 833                 }
 834         }
 835         /*
 836          * check the cpu time limit on the process.
 837          */
 838         if ((current->rlim[RLIMIT_CPU].rlim_max != RLIM_INFINITY) &&
 839             (((current->stime + current->utime) / HZ) >= current->rlim[RLIMIT_CPU].rlim_max))
 840                 send_sig(SIGKILL, current, 1);
 841         if ((current->rlim[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) &&
 842             (((current->stime + current->utime) % HZ) == 0)) {
 843                 psecs = (current->stime + current->utime) / HZ;
 844                 /* send when equal */
 845                 if (psecs == current->rlim[RLIMIT_CPU].rlim_cur)
 846                         send_sig(SIGXCPU, current, 1);
 847                 /* and every five seconds thereafter. */
 848                 else if ((psecs > current->rlim[RLIMIT_CPU].rlim_cur) &&
 849                         ((psecs - current->rlim[RLIMIT_CPU].rlim_cur) % 5) == 0)
 850                         send_sig(SIGXCPU, current, 1);
 851         }
 852 
 853         if (current->pid && 0 > --current->counter) {
 854                 current->counter = 0;
 855                 need_resched = 1;
 856         }
 857         /* Update ITIMER_PROF for the current task */
 858         if (current->it_prof_value && !(--current->it_prof_value)) {
 859                 current->it_prof_value = current->it_prof_incr;
 860                 send_sig(SIGPROF,current,1);
 861         }
 862         for (mask = 1, tp = timer_table+0 ; mask ; tp++,mask += mask) {
 863                 if (mask > timer_active)
 864                         break;
 865                 if (!(mask & timer_active))
 866                         continue;
 867                 if (tp->expires > jiffies)
 868                         continue;
 869                 mark_bh(TIMER_BH);
 870         }
 871         cli();
 872         if (timer_head.next->expires <= jiffies)
 873                 mark_bh(TIMER_BH);
 874         if (tq_timer != &tq_last)
 875                 mark_bh(TQUEUE_BH);
 876         sti();
 877 }
 878 
 879 asmlinkage unsigned int sys_alarm(unsigned int seconds)
     /* [previous][next][first][last][top][bottom][index][help] */
 880 {
 881         struct itimerval it_new, it_old;
 882         unsigned int oldalarm;
 883 
 884         it_new.it_interval.tv_sec = it_new.it_interval.tv_usec = 0;
 885         it_new.it_value.tv_sec = seconds;
 886         it_new.it_value.tv_usec = 0;
 887         _setitimer(ITIMER_REAL, &it_new, &it_old);
 888         oldalarm = it_old.it_value.tv_sec;
 889         /* ehhh.. We can't return 0 if we have an alarm pending.. */
 890         /* And we'd better return too much than too little anyway */
 891         if (it_old.it_value.tv_usec)
 892                 oldalarm++;
 893         return oldalarm;
 894 }
 895 
 896 asmlinkage int sys_getpid(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 897 {
 898         return current->pid;
 899 }
 900 
 901 asmlinkage int sys_getppid(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 902 {
 903         return current->p_opptr->pid;
 904 }
 905 
 906 asmlinkage int sys_getuid(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 907 {
 908         return current->uid;
 909 }
 910 
 911 asmlinkage int sys_geteuid(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 912 {
 913         return current->euid;
 914 }
 915 
 916 asmlinkage int sys_getgid(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 917 {
 918         return current->gid;
 919 }
 920 
 921 asmlinkage int sys_getegid(void)
     /* [previous][next][first][last][top][bottom][index][help] */
 922 {
 923         return current->egid;
 924 }
 925 
 926 asmlinkage int sys_nice(int increment)
     /* [previous][next][first][last][top][bottom][index][help] */
 927 {
 928         unsigned long newprio;
 929         int increase = 0;
 930 
 931         newprio = increment;
 932         if (increment < 0) {
 933                 if (!suser())
 934                         return -EPERM;
 935                 newprio = -increment;
 936                 increase = 1;
 937         }
 938         if (newprio > 40)
 939                 newprio = 40;
 940         /*
 941          * do a "normalization" of the priority (traditionally
 942          * unix nice values are -20..20, linux doesn't really
 943          * use that kind of thing, but uses the length of the
 944          * timeslice instead (default 150 msec). The rounding is
 945          * why we want to avoid negative values.
 946          */
 947         newprio = (newprio * DEF_PRIORITY + 10) / 20;
 948         increment = newprio;
 949         if (increase)
 950                 increment = -increment;
 951         newprio = current->priority - increment;
 952         if (newprio < 1)
 953                 newprio = 1;
 954         if (newprio > DEF_PRIORITY*2)
 955                 newprio = DEF_PRIORITY*2;
 956         current->priority = newprio;
 957         return 0;
 958 }
 959 
 960 static struct task_struct *find_process_by_pid(pid_t pid) {
     /* [previous][next][first][last][top][bottom][index][help] */
 961         struct task_struct *p, *q;
 962 
 963         if (pid == 0)
 964                 p = current;
 965         else {
 966                 p = 0;
 967                 for_each_task(q) {
 968                         if (q && q->pid == pid) {
 969                                 p = q;
 970                                 break;
 971                         }
 972                 }
 973         }
 974         return p;
 975 }
 976 
 977 static int setscheduler(pid_t pid, int policy, 
     /* [previous][next][first][last][top][bottom][index][help] */
 978                         struct sched_param *param)
 979 {
 980         int error;
 981         struct sched_param lp;
 982         struct task_struct *p;
 983 
 984         if (!param || pid < 0)
 985                 return -EINVAL;
 986 
 987         error = verify_area(VERIFY_READ, param, sizeof(struct sched_param));
 988         if (error)
 989                 return error;
 990         memcpy_fromfs(&lp, param, sizeof(struct sched_param));
 991 
 992         p = find_process_by_pid(pid);
 993         if (!p)
 994                 return -ESRCH;
 995                         
 996         if (policy < 0)
 997                 policy = p->policy;
 998         else if (policy != SCHED_FIFO && policy != SCHED_RR &&
 999                  policy != SCHED_OTHER)
1000                 return -EINVAL;
1001         
1002         /*
1003          * Valid priorities for SCHED_FIFO and SCHED_RR are 1..99, valid
1004          * priority for SCHED_OTHER is 0.
1005          */
1006         if (lp.sched_priority < 0 || lp.sched_priority > 99)
1007                 return -EINVAL;
1008         if ((policy == SCHED_OTHER) != (lp.sched_priority == 0))
1009                 return -EINVAL;
1010 
1011         if ((policy == SCHED_FIFO || policy == SCHED_RR) && !suser())
1012                 return -EPERM;
1013         if ((current->euid != p->euid) && (current->euid != p->uid) &&
1014             !suser())
1015                 return -EPERM;
1016 
1017         p->policy = policy;
1018         p->rt_priority = lp.sched_priority;
1019         schedule();
1020 
1021         return 0;
1022 }
1023 
1024 asmlinkage int sys_sched_setscheduler(pid_t pid, int policy, 
     /* [previous][next][first][last][top][bottom][index][help] */
1025                                       struct sched_param *param)
1026 {
1027         return setscheduler(pid, policy, param);
1028 }
1029 
1030 asmlinkage int sys_sched_setparam(pid_t pid, struct sched_param *param)
     /* [previous][next][first][last][top][bottom][index][help] */
1031 {
1032         return setscheduler(pid, -1, param);
1033 }
1034 
1035 asmlinkage int sys_sched_getscheduler(pid_t pid)
     /* [previous][next][first][last][top][bottom][index][help] */
1036 {
1037         struct task_struct *p;
1038 
1039         if (pid < 0)
1040                 return -EINVAL;
1041 
1042         p = find_process_by_pid(pid);
1043         if (!p)
1044                 return -ESRCH;
1045                         
1046         return p->policy;
1047 }
1048 
1049 asmlinkage int sys_sched_getparam(pid_t pid, struct sched_param *param)
     /* [previous][next][first][last][top][bottom][index][help] */
1050 {
1051         int error;
1052         struct task_struct *p;
1053         struct sched_param lp;
1054 
1055         if (!param || pid < 0)
1056                 return -EINVAL;
1057 
1058         error = verify_area(VERIFY_WRITE, param, sizeof(struct sched_param));
1059         if (error)
1060                 return error;
1061 
1062         p = find_process_by_pid(pid);
1063         if (!p)
1064                 return -ESRCH;
1065 
1066         lp.sched_priority = p->rt_priority;
1067         memcpy_tofs(param, &lp, sizeof(struct sched_param));
1068 
1069         return 0;
1070 }
1071 
1072 asmlinkage int sys_sched_yield(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1073 {
1074         /* ... not yet implemented ... */
1075         return -ENOSYS;
1076 }
1077 
1078 asmlinkage int sys_sched_get_priority_max(int policy)
     /* [previous][next][first][last][top][bottom][index][help] */
1079 {
1080         switch (policy) {
1081               case SCHED_FIFO:
1082               case SCHED_RR:
1083                 return 99;
1084               case SCHED_OTHER:
1085                 return 0;
1086         }
1087 
1088         return -EINVAL;
1089 }
1090 
1091 asmlinkage int sys_sched_get_priority_min(int policy)
     /* [previous][next][first][last][top][bottom][index][help] */
1092 {
1093         switch (policy) {
1094               case SCHED_FIFO:
1095               case SCHED_RR:
1096                 return 1;
1097               case SCHED_OTHER:
1098                 return 0;
1099         }
1100 
1101         return -EINVAL;
1102 }
1103 
1104 asmlinkage int sys_sched_rr_get_interval(pid_t pid, struct timespec *interval)
     /* [previous][next][first][last][top][bottom][index][help] */
1105 {
1106         int error;
1107         struct timespec t;
1108 
1109         error = verify_area(VERIFY_WRITE, interval, sizeof(struct timespec));
1110         if (error)
1111                 return error;
1112         
1113         t.tv_sec = 0;
1114         t.tv_nsec = 0;   /* <-- Linus, please fill correct value in here */
1115         return -ENOSYS;  /* and then delete this line. Thanks!           */
1116         memcpy_tofs(interval, &t, sizeof(struct timespec));
1117 
1118         return 0;
1119 }
1120 
1121 static void show_task(int nr,struct task_struct * p)
     /* [previous][next][first][last][top][bottom][index][help] */
1122 {
1123         unsigned long free;
1124         static const char * stat_nam[] = { "R", "S", "D", "Z", "T", "W" };
1125 
1126         printk("%-8s %3d ", p->comm, (p == current) ? -nr : nr);
1127         if (((unsigned) p->state) < sizeof(stat_nam)/sizeof(char *))
1128                 printk(stat_nam[p->state]);
1129         else
1130                 printk(" ");
1131 #if ((~0UL) == 0xffffffff)
1132         if (p == current)
1133                 printk(" current  ");
1134         else
1135                 printk(" %08lX ", thread_saved_pc(&p->tss));
1136 #else
1137         if (p == current)
1138                 printk("   current task   ");
1139         else
1140                 printk(" %016lx ", thread_saved_pc(&p->tss));
1141 #endif
1142         for (free = 1; free < PAGE_SIZE/sizeof(long) ; free++) {
1143                 if (((unsigned long *)p->kernel_stack_page)[free])
1144                         break;
1145         }
1146         printk("%5lu %5d %6d ", free*sizeof(long), p->pid, p->p_pptr->pid);
1147         if (p->p_cptr)
1148                 printk("%5d ", p->p_cptr->pid);
1149         else
1150                 printk("      ");
1151         if (p->p_ysptr)
1152                 printk("%7d", p->p_ysptr->pid);
1153         else
1154                 printk("       ");
1155         if (p->p_osptr)
1156                 printk(" %5d\n", p->p_osptr->pid);
1157         else
1158                 printk("\n");
1159 }
1160 
1161 void show_state(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1162 {
1163         int i;
1164 
1165 #if ((~0UL) == 0xffffffff)
1166         printk("\n"
1167                "                         free                        sibling\n");
1168         printk("  task             PC    stack   pid father child younger older\n");
1169 #else
1170         printk("\n"
1171                "                                 free                        sibling\n");
1172         printk("  task                 PC        stack   pid father child younger older\n");
1173 #endif
1174         for (i=0 ; i<NR_TASKS ; i++)
1175                 if (task[i])
1176                         show_task(i,task[i]);
1177 }
1178 
1179 void sched_init(void)
     /* [previous][next][first][last][top][bottom][index][help] */
1180 {
1181         /*
1182          *      We have to do a little magic to get the first
1183          *      process right in SMP mode.
1184          */
1185         int cpu=smp_processor_id();
1186         current_set[cpu]=&init_task;
1187 #ifdef __SMP__  
1188         init_task.processor=cpu;
1189 #endif
1190         bh_base[TIMER_BH].routine = timer_bh;
1191         bh_base[TQUEUE_BH].routine = tqueue_bh;
1192         bh_base[IMMEDIATE_BH].routine = immediate_bh;
1193         enable_bh(TIMER_BH);
1194         enable_bh(TQUEUE_BH);
1195         enable_bh(IMMEDIATE_BH);
1196 }

/* [previous][next][first][last][top][bottom][index][help] */