diff options
Diffstat (limited to 'libmp/Plan9-power/mpvecdigmulsub.s')
| -rw-r--r-- | libmp/Plan9-power/mpvecdigmulsub.s | 66 |
1 files changed, 66 insertions, 0 deletions
diff --git a/libmp/Plan9-power/mpvecdigmulsub.s b/libmp/Plan9-power/mpvecdigmulsub.s new file mode 100644 index 00000000..bfa4b91a --- /dev/null +++ b/libmp/Plan9-power/mpvecdigmulsub.s @@ -0,0 +1,66 @@ +#define BDNZ BC 16,0, +#define BDNE BC 0,2, +#define BLT BC 0xC,0, + +/* + * mpvecdigmulsub(mpdigit *b, int n, mpdigit m, mpdigit *p) + * + * p -= b*m + * + * each step looks like: + * hi,lo = m*b[i] + * lo += oldhi + carry + * hi += carry + * p[i] += lo + * oldhi = hi + * + * the registers are: + * b = R3 + * n = R4 + * m = R5 + * p = R6 + * i = R7 + * hi = R8 - constrained by hardware + * lo = R9 - constrained by hardware + * oldhi = R10 + * tmp = R11 + * borrow = R12 + * + */ +TEXT mpvecdigmulsub(SB),$0 + + MOVW n+4(FP),R10 + MOVW R10,CTR + MOVW m+8(FP),R5 + MOVW p+12(FP),R6 + SUB $4, R3 /* pre decrement for MOVWU's */ + SUBC $4, R6 /* pre decrement for MOVWU's and set carry */ + MOVW XER,R12 + + MOVW R0, R10 + +_mulsubloop: + MOVWU 4(R3),R9 /* lo = b[i] */ + MOVW 4(R6),R11 /* tmp = p[i] */ + MULHWU R9,R5,R8 /* hi = (b[i] * m)>>32 */ + MULLW R9,R5,R9 /* lo = b[i] * m */ + ADDC R10,R9 /* lo += oldhi */ + ADDE R0,R8 /* hi += carry */ + MOVW R12,XER + SUBE R9,R11 /* tmp -= lo */ + MOVW XER,R12 + MOVWU R11,4(R6) /* p[i] = tmp */ + MOVW R8,R10 /* oldhi = hi */ + BDNZ _mulsubloop + + MOVW 4(R6),R11 /* tmp = p[i] */ + MOVW R12,XER + SUBE R10,R11 /* tmp -= lo */ + MOVWU R11,4(R6) /* p[i] = tmp */ + + /* return -1 if the result was negative, +1 otherwise */ + SUBECC R0,R0,R3 + BLT _mulsub2 + MOVW $1,R3 +_mulsub2: + RETURN |
