diff options
Diffstat (limited to 'libmp/Plan9-power')
| -rw-r--r-- | libmp/Plan9-power/mkfile | 20 | ||||
| -rw-r--r-- | libmp/Plan9-power/mpvecadd.s | 61 | ||||
| -rw-r--r-- | libmp/Plan9-power/mpvecdigmuladd.s | 56 | ||||
| -rw-r--r-- | libmp/Plan9-power/mpvecdigmulsub.s | 66 | ||||
| -rw-r--r-- | libmp/Plan9-power/mpvecsub.s | 57 |
5 files changed, 260 insertions, 0 deletions
diff --git a/libmp/Plan9-power/mkfile b/libmp/Plan9-power/mkfile new file mode 100644 index 00000000..9f5db59f --- /dev/null +++ b/libmp/Plan9-power/mkfile @@ -0,0 +1,20 @@ +objtype=power +OBJTYPE=$objtype +<../../mkconfig + +LIB=libmp.a +SFILES=\ + mpvecadd.s\ + mpvecdigmuladd.s\ + mpvecdigmulsub.s\ + mpvecsub.s\ + +HFILES=$ROOT/$SYSTARG/$OBJTYPE/include/u.h $ROOT/include/mp.h ../port/dat.h + +OFILES=${SFILES:%.s=%.$O} + +UPDATE=mkfile\ + $HFILES\ + $SFILES\ + +<$ROOT/mkfiles/mksyslib-$SHELLTYPE diff --git a/libmp/Plan9-power/mpvecadd.s b/libmp/Plan9-power/mpvecadd.s new file mode 100644 index 00000000..abbfe53d --- /dev/null +++ b/libmp/Plan9-power/mpvecadd.s @@ -0,0 +1,61 @@ +#define BDNZ BC 16,0, +#define BDNE BC 0,2, + +/* + * mpvecadd(mpdigit *a, int alen, mpdigit *b, int blen, mpdigit *sum) + * + * sum[0:alen] = a[0:alen-1] + b[0:blen-1] + * + * prereq: alen >= blen, sum has room for alen+1 digits + * + * R3 == a (first arg passed in R3) + * R4 == alen + * R5 == b + * R6 == blen + * R7 == sum + * R8 == temporary + * R9 == temporary + */ +TEXT mpvecadd(SB),$-4 + + MOVW alen+4(FP), R4 + MOVW b+8(FP), R5 + MOVW blen+12(FP), R6 + MOVW sum+16(FP), R7 + SUB R6, R4 /* calculate counter for second loop (alen > blen) */ + SUB $4, R3 /* pre decrement for MOVWU's */ + SUB $4, R5 /* pre decrement for MOVWU's */ + SUB $4, R7 /* pre decrement for MOVWU's */ + MOVW R0, XER /* zero carry going in */ + + /* if blen == 0, don't need to add it in */ + CMP R0, R6 + BEQ _add1 + + /* sum[0:blen-1],carry = a[0:blen-1] + b[0:blen-1] */ + MOVW R6, CTR +_addloop1: + MOVWU 4(R3), R8 + MOVWU 4(R5), R9 + ADDE R8, R9 + MOVWU R9, 4(R7) + BDNZ _addloop1 + +_add1: + /* if alen == blen, we're done */ + CMP R0, R4 + BEQ _addend + + /* sum[blen:alen-1],carry = a[blen:alen-1] + 0 + carry */ + MOVW R4, CTR +_addloop2: + MOVWU 4(R3), R8 + ADDE R0, R8 + MOVWU R8, 4(R7) + BDNZ _addloop2 + + /* sum[alen] = carry */ +_addend: + ADDE R0, R0, R8 + MOVW R8, 4(R7) + RETURN diff --git a/libmp/Plan9-power/mpvecdigmuladd.s b/libmp/Plan9-power/mpvecdigmuladd.s new file mode 100644 index 00000000..b86f7ec7 --- /dev/null +++ b/libmp/Plan9-power/mpvecdigmuladd.s @@ -0,0 +1,56 @@ +#define BDNZ BC 16,0, +#define BDNE BC 0,2, + +/* + * mpvecdigmuladd(mpdigit *b, int n, mpdigit m, mpdigit *p) + * + * p += b*m + * + * each step looks like: + * hi,lo = m*b[i] + * lo += oldhi + carry + * hi += carry + * p[i] += lo + * oldhi = hi + * + * the registers are: + * b = R3 + * n = R4 + * m = R5 + * p = R6 + * i = R7 + * hi = R8 - constrained by hardware + * lo = R9 - constrained by hardware + * oldhi = R10 + * tmp = R11 + * + */ +TEXT mpvecdigmuladd(SB),$0 + + MOVW n+4(FP),R4 + MOVW m+8(FP),R5 + MOVW p+12(FP),R6 + SUB $4, R3 /* pre decrement for MOVWU's */ + SUB $4, R6 /* pre decrement for MOVWU's */ + + MOVW R0, R10 + MOVW R0, XER + MOVW R4, CTR +_muladdloop: + MOVWU 4(R3),R9 /* lo = b[i] */ + MOVW 4(R6),R11 /* tmp = p[i] */ + MULHWU R9,R5,R8 /* hi = (b[i] * m)>>32 */ + MULLW R9,R5,R9 /* lo = b[i] * m */ + ADDC R10,R9 /* lo += oldhi */ + ADDE R0,R8 /* hi += carry */ + ADDC R9,R11 /* tmp += lo */ + ADDE R0,R8 /* hi += carry */ + MOVWU R11,4(R6) /* p[i] = tmp */ + MOVW R8,R10 /* oldhi = hi */ + BDNZ _muladdloop + + MOVW 4(R6),R11 /* tmp = p[i] */ + ADDC R10,R11 + MOVWU R11,4(R6) /* p[i] = tmp */ + + RETURN diff --git a/libmp/Plan9-power/mpvecdigmulsub.s b/libmp/Plan9-power/mpvecdigmulsub.s new file mode 100644 index 00000000..bfa4b91a --- /dev/null +++ b/libmp/Plan9-power/mpvecdigmulsub.s @@ -0,0 +1,66 @@ +#define BDNZ BC 16,0, +#define BDNE BC 0,2, +#define BLT BC 0xC,0, + +/* + * mpvecdigmulsub(mpdigit *b, int n, mpdigit m, mpdigit *p) + * + * p -= b*m + * + * each step looks like: + * hi,lo = m*b[i] + * lo += oldhi + carry + * hi += carry + * p[i] += lo + * oldhi = hi + * + * the registers are: + * b = R3 + * n = R4 + * m = R5 + * p = R6 + * i = R7 + * hi = R8 - constrained by hardware + * lo = R9 - constrained by hardware + * oldhi = R10 + * tmp = R11 + * borrow = R12 + * + */ +TEXT mpvecdigmulsub(SB),$0 + + MOVW n+4(FP),R10 + MOVW R10,CTR + MOVW m+8(FP),R5 + MOVW p+12(FP),R6 + SUB $4, R3 /* pre decrement for MOVWU's */ + SUBC $4, R6 /* pre decrement for MOVWU's and set carry */ + MOVW XER,R12 + + MOVW R0, R10 + +_mulsubloop: + MOVWU 4(R3),R9 /* lo = b[i] */ + MOVW 4(R6),R11 /* tmp = p[i] */ + MULHWU R9,R5,R8 /* hi = (b[i] * m)>>32 */ + MULLW R9,R5,R9 /* lo = b[i] * m */ + ADDC R10,R9 /* lo += oldhi */ + ADDE R0,R8 /* hi += carry */ + MOVW R12,XER + SUBE R9,R11 /* tmp -= lo */ + MOVW XER,R12 + MOVWU R11,4(R6) /* p[i] = tmp */ + MOVW R8,R10 /* oldhi = hi */ + BDNZ _mulsubloop + + MOVW 4(R6),R11 /* tmp = p[i] */ + MOVW R12,XER + SUBE R10,R11 /* tmp -= lo */ + MOVWU R11,4(R6) /* p[i] = tmp */ + + /* return -1 if the result was negative, +1 otherwise */ + SUBECC R0,R0,R3 + BLT _mulsub2 + MOVW $1,R3 +_mulsub2: + RETURN diff --git a/libmp/Plan9-power/mpvecsub.s b/libmp/Plan9-power/mpvecsub.s new file mode 100644 index 00000000..c2a05bc1 --- /dev/null +++ b/libmp/Plan9-power/mpvecsub.s @@ -0,0 +1,57 @@ +#define BDNZ BC 16,0, +#define BDNE BC 0,2, + +/* + * mpvecsub(mpdigit *a, int alen, mpdigit *b, int blen, mpdigit *diff) + * + * diff[0:alen-1] = a[0:alen-1] - b[0:blen-1] + * + * prereq: alen >= blen, diff has room for alen digits + * + * R3 == a + * R4 == alen + * R5 == b + * R6 == blen + * R7 == diff + * R8 == temporary + * R9 == temporary + */ +TEXT mpvecsub(SB),$-4 + + MOVW alen+4(FP),R4 + MOVW b+8(FP),R5 + MOVW blen+12(FP),R6 + MOVW diff+16(FP),R7 + SUB R6, R4 /* calculate counter for second loop (alen > blen) */ + SUB $4, R3 /* pre decrement for MOVWU's */ + SUB $4, R5 /* pre decrement for MOVWU's */ + SUBC $4, R7 /* pre decrement for MOVWU's and set carry */ + + /* skip subraction if b is zero */ + CMP R0,R6 + BEQ _sub1 + + /* diff[0:blen-1],borrow = a[0:blen-1] - b[0:blen-1] */ + MOVW R6, CTR +_subloop1: + MOVWU 4(R3), R8 + MOVWU 4(R5), R9 + SUBE R9, R8, R8 + MOVWU R8, 4(R7) + BDNZ _subloop1 + +_sub1: + /* skip subtraction if a is zero */ + CMP R0, R4 + BEQ _subend + + /* diff[blen:alen-1] = a[blen:alen-1] - 0 + carry */ + MOVW R4, CTR +_subloop2: + MOVWU 4(R3), R8 + SUBE R0, R8 + MOVWU R8, 4(R7) + BDNZ _subloop2 +_subend: + RETURN + |
