1 .file "wm_shrx.S" 2 /*---------------------------------------------------------------------------+ 3 | wm_shrx.S | 4 | | 5 | 64 bit right shift functions | 6 | | 7 | Copyright (C) 1992,1995 | 8 | W. Metzenthen, 22 Parker St, Ormond, Vic 3163, | 9 | Australia. E-mail billm@jacobi.maths.monash.edu.au | 10 | | 11 | Call from C as: | 12 | unsigned shrx(void *arg1, unsigned arg2) | 13 | and | 14 | unsigned shrxs(void *arg1, unsigned arg2) | 15 | | 16 +---------------------------------------------------------------------------*/ 17 18 #include "fpu_emu.h" 19 20 .text 21 /*---------------------------------------------------------------------------+ 22 | unsigned shrx(void *arg1, unsigned arg2) | 23 | | 24 | Extended shift right function. | 25 | Fastest for small shifts. | 26 | Shifts the 64 bit quantity pointed to by the first arg (arg1) | 27 | right by the number of bits specified by the second arg (arg2). | 28 | Forms a 96 bit quantity from the 64 bit arg and eax: | 29 | [ 64 bit arg ][ eax ] | 30 | shift right ---------> | 31 | The eax register is initialized to 0 before the shifting. | 32 | Results returned in the 64 bit arg and eax. | 33 +---------------------------------------------------------------------------*/ 34 35 ENTRY(shrx) 36 push %ebp 37 movl %esp,%ebp 38 pushl %esi 39 movl PARAM2,%ecx 40 movl PARAM1,%esi 41 cmpl $32,%ecx /* shrd only works for 0..31 bits */ 42 jnc L_more_than_31 43 44 /* less than 32 bits */ 45 pushl %ebx 46 movl (%esi),%ebx /* lsl */ 47 movl 4(%esi),%edx /* msl */ 48 xorl %eax,%eax /* extension */ 49 shrd %cl,%ebx,%eax 50 shrd %cl,%edx,%ebx 51 shr %cl,%edx 52 movl %ebx,(%esi) 53 movl %edx,4(%esi) 54 popl %ebx 55 popl %esi 56 leave 57 ret 58 59 L_more_than_31: 60 cmpl $64,%ecx 61 jnc L_more_than_63 62 63 subb $32,%cl 64 movl (%esi),%eax /* lsl */ 65 movl 4(%esi),%edx /* msl */ 66 shrd %cl,%edx,%eax 67 shr %cl,%edx 68 movl %edx,(%esi) 69 movl $0,4(%esi) 70 popl %esi 71 leave 72 ret 73 74 L_more_than_63: 75 cmpl $96,%ecx 76 jnc L_more_than_95 77 78 subb $64,%cl 79 movl 4(%esi),%eax /* msl */ 80 shr %cl,%eax 81 xorl %edx,%edx 82 movl %edx,(%esi) 83 movl %edx,4(%esi) 84 popl %esi 85 leave 86 ret 87 88 L_more_than_95: 89 xorl %eax,%eax 90 movl %eax,(%esi) 91 movl %eax,4(%esi) 92 popl %esi 93 leave 94 ret 95 96 97 /*---------------------------------------------------------------------------+ 98 | unsigned shrxs(void *arg1, unsigned arg2) | 99 | | 100 | Extended shift right function (optimized for small floating point | 101 | integers). | 102 | Shifts the 64 bit quantity pointed to by the first arg (arg1) | 103 | right by the number of bits specified by the second arg (arg2). | 104 | Forms a 96 bit quantity from the 64 bit arg and eax: | 105 | [ 64 bit arg ][ eax ] | 106 | shift right ---------> | 107 | The eax register is initialized to 0 before the shifting. | 108 | The lower 8 bits of eax are lost and replaced by a flag which is | 109 | set (to 0x01) if any bit, apart from the first one, is set in the | 110 | part which has been shifted out of the arg. | 111 | Results returned in the 64 bit arg and eax. | 112 +---------------------------------------------------------------------------*/ 113 ENTRY(shrxs) 114 push %ebp 115 movl %esp,%ebp 116 pushl %esi 117 pushl %ebx 118 movl PARAM2,%ecx 119 movl PARAM1,%esi 120 cmpl $64,%ecx /* shrd only works for 0..31 bits */ 121 jnc Ls_more_than_63 122 123 cmpl $32,%ecx /* shrd only works for 0..31 bits */ 124 jc Ls_less_than_32 125 126 /* We got here without jumps by assuming that the most common requirement 127 is for small integers */ 128 /* Shift by [32..63] bits */ 129 subb $32,%cl 130 movl (%esi),%eax /* lsl */ 131 movl 4(%esi),%edx /* msl */ 132 xorl %ebx,%ebx 133 shrd %cl,%eax,%ebx 134 shrd %cl,%edx,%eax 135 shr %cl,%edx 136 orl %ebx,%ebx /* test these 32 bits */ 137 setne %bl 138 test $0x7fffffff,%eax /* and 31 bits here */ 139 setne %bh 140 orw %bx,%bx /* Any of the 63 bit set ? */ 141 setne %al 142 movl %edx,(%esi) 143 movl $0,4(%esi) 144 popl %ebx 145 popl %esi 146 leave 147 ret 148 149 /* Shift by [0..31] bits */ 150 Ls_less_than_32: 151 movl (%esi),%ebx /* lsl */ 152 movl 4(%esi),%edx /* msl */ 153 xorl %eax,%eax /* extension */ 154 shrd %cl,%ebx,%eax 155 shrd %cl,%edx,%ebx 156 shr %cl,%edx 157 test $0x7fffffff,%eax /* only need to look at eax here */ 158 setne %al 159 movl %ebx,(%esi) 160 movl %edx,4(%esi) 161 popl %ebx 162 popl %esi 163 leave 164 ret 165 166 /* Shift by [64..95] bits */ 167 Ls_more_than_63: 168 cmpl $96,%ecx 169 jnc Ls_more_than_95 170 171 subb $64,%cl 172 movl (%esi),%ebx /* lsl */ 173 movl 4(%esi),%eax /* msl */ 174 xorl %edx,%edx /* extension */ 175 shrd %cl,%ebx,%edx 176 shrd %cl,%eax,%ebx 177 shr %cl,%eax 178 orl %ebx,%edx 179 setne %bl 180 test $0x7fffffff,%eax /* only need to look at eax here */ 181 setne %bh 182 orw %bx,%bx 183 setne %al 184 xorl %edx,%edx 185 movl %edx,(%esi) /* set to zero */ 186 movl %edx,4(%esi) /* set to zero */ 187 popl %ebx 188 popl %esi 189 leave 190 ret 191 192 Ls_more_than_95: 193 /* Shift by [96..inf) bits */ 194 xorl %eax,%eax 195 movl (%esi),%ebx 196 orl 4(%esi),%ebx 197 setne %al 198 xorl %ebx,%ebx 199 movl %ebx,(%esi) 200 movl %ebx,4(%esi) 201 popl %ebx 202 popl %esi 203 leave 204 ret