root/arch/i386/math-emu/polynom_Xsig.S

/* [previous][next][first][last][top][bottom][index][help] */
   1 /*---------------------------------------------------------------------------+
   2  |  polynomial_Xsig.S                                                        |
   3  |                                                                           |
   4  | Fixed point arithmetic polynomial evaluation.                             |
   5  |                                                                           |
   6  | Copyright (C) 1992,1993,1994                                              |
   7  |                       W. Metzenthen, 22 Parker St, Ormond, Vic 3163,      |
   8  |                       Australia.  E-mail   billm@vaxc.cc.monash.edu.au    |
   9  |                                                                           |
  10  | Call from C as:                                                           |
  11  |   void polynomial_Xsig(Xsig *accum, unsigned long long x,                 |
  12  |                        unsigned long long terms[], int n)                 |
  13  |                                                                           |
  14  | Computes:                                                                 |
  15  | terms[0] + (terms[1] + (terms[2] + ... + (terms[n-1]*x)*x)*x)*x) ... )*x  |
  16  | and adds the result to the 12 byte Xsig.                                  |
  17  | The terms[] are each 8 bytes, but all computation is performed to 12 byte |
  18  | precision.                                                                |
  19  |                                                                           |
  20  | This function must be used carefully: most overflow of intermediate       |
  21  | results is controlled, but overflow of the result is not.                 |
  22  |                                                                           |
  23  +---------------------------------------------------------------------------*/
  24         .file   "polynomial_Xsig.S"
  25 
  26 #include "fpu_asm.h"
  27 
  28 
  29 #define TERM_SIZE       $8
  30 #define SUM_MS          -20(%ebp)       /* sum ms long */
  31 #define SUM_MIDDLE      -24(%ebp)       /* sum middle long */
  32 #define SUM_LS          -28(%ebp)       /* sum ls long */
  33 #define ACCUM_MS        -4(%ebp)        /* accum ms long */
  34 #define ACCUM_MIDDLE    -8(%ebp)        /* accum middle long */
  35 #define ACCUM_LS        -12(%ebp)       /* accum ls long */
  36 #define OVERFLOWED      -16(%ebp)       /* addition overflow flag */
  37 
  38 .text
  39         .align 2,144
  40 .globl _polynomial_Xsig
  41 _polynomial_Xsig:
  42         pushl   %ebp
  43         movl    %esp,%ebp
  44         subl    $32,%esp
  45         pushl   %esi
  46         pushl   %edi
  47         pushl   %ebx
  48 
  49         movl    PARAM2,%esi             /* x */
  50         movl    PARAM3,%edi             /* terms */
  51 
  52         movl    TERM_SIZE,%eax
  53         mull    PARAM4                  /* n */
  54         addl    %eax,%edi
  55 
  56         movl    4(%edi),%edx            /* terms[n] */
  57         movl    %edx,SUM_MS
  58         movl    (%edi),%edx             /* terms[n] */
  59         movl    %edx,SUM_MIDDLE
  60         xor     %eax,%eax
  61         movl    %eax,SUM_LS
  62         movb    %al,OVERFLOWED
  63 
  64         subl    TERM_SIZE,%edi
  65         decl    PARAM4
  66         js      L_accum_done
  67 
  68 L_accum_loop:
  69         xor     %eax,%eax
  70         movl    %eax,ACCUM_MS
  71         movl    %eax,ACCUM_MIDDLE
  72 
  73         movl    SUM_MIDDLE,%eax
  74         mull    (%esi)                  /* x ls long */
  75         movl    %edx,ACCUM_LS
  76 
  77         movl    SUM_MIDDLE,%eax
  78         mull    4(%esi)                 /* x ms long */
  79         addl    %eax,ACCUM_LS
  80         adcl    %edx,ACCUM_MIDDLE
  81         adcl    $0,ACCUM_MS
  82 
  83         movl    SUM_MS,%eax
  84         mull    (%esi)                  /* x ls long */
  85         addl    %eax,ACCUM_LS
  86         adcl    %edx,ACCUM_MIDDLE
  87         adcl    $0,ACCUM_MS
  88 
  89         movl    SUM_MS,%eax
  90         mull    4(%esi)                 /* x ms long */
  91         addl    %eax,ACCUM_MIDDLE
  92         adcl    %edx,ACCUM_MS
  93 
  94         testb   $0xff,OVERFLOWED
  95         jz      L_no_overflow
  96 
  97         movl    (%esi),%eax
  98         addl    %eax,ACCUM_MIDDLE
  99         movl    4(%esi),%eax
 100         adcl    %eax,ACCUM_MS           /* This could overflow too */
 101 
 102 L_no_overflow:
 103 
 104 /*
 105  * Now put the sum of next term and the accumulator
 106  * into the sum register
 107  */
 108         movl    ACCUM_LS,%eax
 109         addl    (%edi),%eax             /* term ls long */
 110         movl    %eax,SUM_LS
 111         movl    ACCUM_MIDDLE,%eax
 112         adcl    (%edi),%eax             /* term ls long */
 113         movl    %eax,SUM_MIDDLE
 114         movl    ACCUM_MS,%eax
 115         adcl    4(%edi),%eax            /* term ms long */
 116         movl    %eax,SUM_MS
 117         sbbb    %al,%al
 118         movb    %al,OVERFLOWED          /* Used in the next iteration */
 119 
 120         subl    TERM_SIZE,%edi
 121         decl    PARAM4
 122         jns     L_accum_loop
 123 
 124 L_accum_done:
 125         movl    PARAM1,%edi             /* accum */
 126         movl    SUM_LS,%eax
 127         addl    %eax,(%edi)
 128         movl    SUM_MIDDLE,%eax
 129         adcl    %eax,4(%edi)
 130         movl    SUM_MS,%eax
 131         adcl    %eax,8(%edi)
 132 
 133         popl    %ebx
 134         popl    %edi
 135         popl    %esi
 136         leave
 137         ret

/* [previous][next][first][last][top][bottom][index][help] */